#!/usr/bin/env python
# coding: utf-8

# # This project analyzes USA unemployment rate from 1948 to 2016 
# --by Lu Tang

# ## Part 1. Step by step plotting line chart for time series data 

# In[1]:


# import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

# load data
unrate = pd.read_csv('unrate.csv')
# view data
unrate.head()


# In[2]:


# view last 5 rows of the data
unrate.tail()


# In[3]:


# Check information 
unrate.info()


# In[4]:


# Convert Object to datetime
unrate['DATE']=pd.to_datetime(unrate['DATE'])
unrate.info()


# In[5]:


# Create a new column for Month using Pandas datime method
unrate['MONTH'] = unrate['DATE'].dt.month
unrate['YEAR'] = unrate['DATE'].dt.year
# view the table
unrate.tail(10)


# In[6]:


fig = plt.figure(figsize=(10,5))
# use a for loop to plot line charts from 2011 to 2015
colors = ['red', 'blue', 'green', 'orange']
for i in range(4):
    start_index = i*12
    end_index = (i+1)*12
    subset = unrate[start_index:end_index]
    label = str(2012 + i) # add labels
    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
    
# since 2016 only has data to August, we will plot separately    
plt.plot(unrate[816:]['MONTH'], unrate[816:]['VALUE'], c='purple', label='2016')

plt.xlabel('Month')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 2012-2016')
plt.legend(loc='upper left')
plt.show()


# In[7]:


# save the chart
fig.savefig('Monthly Unemployment Trends, 2012-2016.png')


# In[8]:


# use groupby function to find average unemployment rate for each year
unrate_year=unrate.groupby('YEAR').mean()['VALUE']
unrate_year.head()


# In[9]:


# plot a line chart using pandas' visualization tool 
unrate_year.plot(figsize=(10,5),title='Annually Average Unemployment Rate, 1948-2016')
plt.ylabel('Unemployment Rate, Percent')


# ## Part 2. Analyzing unemployment trend suing moving average method

# In[10]:


# use seaborn and set different style
import seaborn as sns
sns.set(style='darkgrid', context='talk', palette='Dark2')
# loading data again
df = pd.read_csv('unrate.csv')
df.head()


# In[11]:


# Convert to datetime
df['DATE']=pd.to_datetime(df['DATE'])
# Set 'DATE' as index
df.index = df['DATE']
# drop the DATE columns
df.drop('DATE', axis=1, inplace=True)

# Calculating the short-window simple moving average
df['short_rolling'] = df['VALUE'].rolling(window=20).mean()

# Calculating the long-window simple moving average
df['long_rolling'] = df['VALUE'].rolling(100).mean()

df.head(20)


# In[12]:


# view last 5 rows
df.tail()


# In[13]:


# plotting line charts to compare the orignal data with the moving average data
fig, ax = plt.subplots(figsize=(10,5))

ax.plot(df['VALUE'], label='VALUE')
ax.plot(df['short_rolling'], label = 'short_rolling')
ax.plot(df['long_rolling'], label = 'long_rolling')

plt.title('USA Unemployment Trends, 1948-2016')
ax.legend(loc='best')
ax.set_ylabel('unemployment_rate')


# >- As we can see short rolling line is very similar with original data, long rolling line is smoothier. 
# >- We can also observe that the unemployment rate is higher in recent years, but in general, it shows high fluctuation 

# In[14]:


# Calculating the moving average with window=50
df['mid_rolling'] = df['VALUE'].rolling(50).mean()


# In[15]:


# plotting line chart for the moving average with window=50, mid_rolling
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(df['mid_rolling'])
plt.title('USA Unemployment Trends, 1948-2016')
ax.set_ylabel('unemployment_rate')


# In[16]:


# save the chart
fig.savefig('USA Unemployment Trend.png')