#!/usr/bin/env python # coding: utf-8 # # This project analyzes USA unemployment rate from 1948 to 2016 # --by Lu Tang # ## Part 1. Step by step plotting line chart for time series data # In[1]: # import library import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # load data unrate = pd.read_csv('unrate.csv') # view data unrate.head() # In[2]: # view last 5 rows of the data unrate.tail() # In[3]: # Check information unrate.info() # In[4]: # Convert Object to datetime unrate['DATE']=pd.to_datetime(unrate['DATE']) unrate.info() # In[5]: # Create a new column for Month using Pandas datime method unrate['MONTH'] = unrate['DATE'].dt.month unrate['YEAR'] = unrate['DATE'].dt.year # view the table unrate.tail(10) # In[6]: fig = plt.figure(figsize=(10,5)) # use a for loop to plot line charts from 2011 to 2015 colors = ['red', 'blue', 'green', 'orange'] for i in range(4): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(2012 + i) # add labels plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) # since 2016 only has data to August, we will plot separately plt.plot(unrate[816:]['MONTH'], unrate[816:]['VALUE'], c='purple', label='2016') plt.xlabel('Month') plt.ylabel('Unemployment Rate, Percent') plt.title('Monthly Unemployment Trends, 2012-2016') plt.legend(loc='upper left') plt.show() # In[7]: # save the chart fig.savefig('Monthly Unemployment Trends, 2012-2016.png') # In[8]: # use groupby function to find average unemployment rate for each year unrate_year=unrate.groupby('YEAR').mean()['VALUE'] unrate_year.head() # In[9]: # plot a line chart using pandas' visualization tool unrate_year.plot(figsize=(10,5),title='Annually Average Unemployment Rate, 1948-2016') plt.ylabel('Unemployment Rate, Percent') # ## Part 2. Analyzing unemployment trend suing moving average method # In[10]: # use seaborn and set different style import seaborn as sns sns.set(style='darkgrid', context='talk', palette='Dark2') # loading data again df = pd.read_csv('unrate.csv') df.head() # In[11]: # Convert to datetime df['DATE']=pd.to_datetime(df['DATE']) # Set 'DATE' as index df.index = df['DATE'] # drop the DATE columns df.drop('DATE', axis=1, inplace=True) # Calculating the short-window simple moving average df['short_rolling'] = df['VALUE'].rolling(window=20).mean() # Calculating the long-window simple moving average df['long_rolling'] = df['VALUE'].rolling(100).mean() df.head(20) # In[12]: # view last 5 rows df.tail() # In[13]: # plotting line charts to compare the orignal data with the moving average data fig, ax = plt.subplots(figsize=(10,5)) ax.plot(df['VALUE'], label='VALUE') ax.plot(df['short_rolling'], label = 'short_rolling') ax.plot(df['long_rolling'], label = 'long_rolling') plt.title('USA Unemployment Trends, 1948-2016') ax.legend(loc='best') ax.set_ylabel('unemployment_rate') # >- As we can see short rolling line is very similar with original data, long rolling line is smoothier. # >- We can also observe that the unemployment rate is higher in recent years, but in general, it shows high fluctuation # In[14]: # Calculating the moving average with window=50 df['mid_rolling'] = df['VALUE'].rolling(50).mean() # In[15]: # plotting line chart for the moving average with window=50, mid_rolling fig, ax = plt.subplots(figsize=(10,5)) ax.plot(df['mid_rolling']) plt.title('USA Unemployment Trends, 1948-2016') ax.set_ylabel('unemployment_rate') # In[16]: # save the chart fig.savefig('USA Unemployment Trend.png')