#!/usr/bin/env python # coding: utf-8 # # Monthly Unemployed rate in USA from 1948 to 2016. # ## Introduction : # The United States Bureau of Labor Statistics (BLS) surveys and calculates the monthly unemployment rate. The unemployment rate is the percentage of individuals in the labor force without a job. While unemployment rate isn't perfect, it's a commonly used proxy for the health of the economy. You may have heard politicians and reporters state the unemployment rate when commenting on the economy. # # The BLS releases monthly unemployment data available for download as an Excel file, with the .xlsx file extension. While the pandas library can read in XLSX files, it relies on an external library for actually parsing the format. Let's instead download the same dataset as a CSV file from the website of the Federal Reserve Bank of St. Louis. We've downloaded the monthly unemployment rate as a CSV from January 1948 to August 2016, saved it as unrate.csv, and made it available in this mission. # # ## Goal of the project: # Time series data analysis on uneployment rate. # In[1]: # Import libraries import numpy as np import pandas as pd # In[2]: unrate = pd.read_csv("unrate.csv", encoding = "Latin-1") unrate.head() # The dataset contain 2 columns where DATE column shows startting date of month of the year. And value describe employment rate in percent. # In[3]: # Check the data types unrate.dtypes # In[4]: # Convert DATE column to datetime from text to datetime. unrate["DATE"] = pd.to_datetime(unrate["DATE"]) # Display the first 12 rows in unrate dataset which reflect the unemployment rate from January 1948 to December 1948: unrate.head(12) # In[5]: # import matplot library to visualize better in plot and magic line %inline for viewing plot in same output cell import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # # Generate a line chart that visualizes the unemployment rates from 1948: # * x-values should be the first 12 values in the DATE column # * y-values should be the first 12 values in the VALUE column # * Display the plot. # In[6]: # First 12 values of date column plot on x axis x = unrate["DATE"][0:12] # First 12 values of VALUE column plot on y axis y = unrate["VALUE"][0:12] # set x,y value in plot function as arguments plt.plot(x,y) # Show the plot in line plot plt.show(); # The above chart x axis is not readable, to make it clear we can use xticks by rotation. # In[7]: # First 12 values of date column plot on x axis x = unrate["DATE"][0:12] # First 12 values of VALUE column plot on y axis y = unrate["VALUE"][0:12] # set x,y value in plot function as arguments plt.plot(x,y) # Move xticks on 90 degree to clear readable plt.xticks(rotation = 90 ) # Show the plot in line plot plt.show(); # * Set the x-axis label to "Month". # * Set the y-axis label to "Unemployment Rate". # * Set the plot title to "Monthly Unemployment Trends, 1948". # * Display the plot. # In[8]: # First 12 values of date column plot on x axis x = unrate["DATE"][0:12] # First 12 values of VALUE column plot on y axis y = unrate["VALUE"][0:12] # set x,y value in plot function as arguments plt.plot(x,y) # Move xticks on 90 degree to clear readable plt.xticks(rotation = 90 ) # Set x axis label name as Month as text string plt.xlabel("Month") # Name the y axis label as Unemployment rate as text string plt.ylabel("Unemployment Rate") # Set the title of line chart plt.title("Monthly Unemployment Trends, 1948") # Show the plot in line plot plt.show(); # # Lets create a figure, add subplots and display it. # In[9]: import matplotlib.pyplot as plt fig = plt.figure() ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) plt.show() # # Create 2 line subplots in a 2 row by 1 column layout: # In the top subplot, plot the data from 1948. # # * For the x-axis, use the first 12 values in the DATE column. # * For the y-axis, use the first 12 values in the VALUE column. # # In the bottom subplot, plot the data from 1949. # # * For the x-axis, use the values from index 12 to 23 in the DATE column. # * For the y-axis, use the values from index 12 to 23 in the VALUE column. # Use plt.show() to display all the plots. # In[10]: fig = plt.figure() ax1= fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) x1 = unrate["DATE"][:12] x2 = unrate["VALUE"][:12] x3 = unrate["DATE"][12:24] x4 = unrate["VALUE"][12:24] ax1.plot(x1,x2) ax2.plot(x3,x4) plt.show() # In[11]: # Another method fig = plt.figure() ax1= fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax1.plot(unrate["DATE"][:12], unrate["VALUE"][:12] ) ax2.plot(unrate["DATE"][12:24],unrate["VALUE"][12:24]) plt.show() # # One issue with the 2 plots is that the x-axis ticks labels are unreadable. The other issue is that the plots are squeezed together vertically and hard to interpret. Even though now we generated 2 line charts, the total plotting area for the figure remained the same: # # This is because matplotlib used the default dimensions for the total plotting area instead of resizing it to accommodate the plots. If we want to expand the plotting area, we have to specify this ourselves when we create the figure. To tweak the dimensions of the plotting area, we need to use the figsize parameter when we call plt.figure(): # # This parameter takes in a tuple of floats: # In[12]: fig = plt.figure(figsize =(12,5)) ax1= fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax1.plot(unrate[:12]["DATE"], unrate[:12]["VALUE"] ) ax1.set_title("Monthly Unemployment rate in 1948") ax2.plot(unrate[12:24]["DATE"],unrate[12:24]["VALUE"]) ax2.set_title("Monthly Unemployment rate in 1949") plt.show() # ## comparing across more years # In[13]: # On this screen, we're going to visualize data from a few more years to see if we find any evidence for seasonality between those years. # Set the width of the plotting area to 12 inches and the height to 12 inches. fig = plt.figure(figsize=(12,12)) # Generate a grid with 5 rows and 1 column and plot data from the individual years. # Start with 1948 in the top subplot and end with 1952 in the bottom subplot. for i in range(5): ax = fig.add_subplot(5,1,i+1) start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] ax.plot(subset['DATE'], subset['VALUE']) plt.show() # In[14]: unrate.head() # In[15]: unrate['MONTH'] = unrate['DATE'].dt.month # In[16]: unrate["MONTH"].unique() # ## Overlaying line charts # In[17]: #extract the month values from the DATE column and assign them to a new column, we can use the pandas.Series.dt accessor: unrate['MONTH'] = unrate['DATE'].dt.month # set the plotting area to a width of 6 and height of 3 inches. fig = plt.figure(figsize=(6,3)) # Generate 2 line charts in the base subplot, using the MONTH column for the x-axis instead of the DATE column: # One line chart using data from 1948, with the line color set to "red". # One line chart using data from 1949, with the line color set to "blue". plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c= "red") plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c = "blue") plt.show(); # In[18]: #extract the month values from the DATE column and assign them to a new column, we can use the pandas.Series.dt accessor: unrate['MONTH'] = unrate['DATE'].dt.month # set the plotting area to a width of 10 and height of 6 inches. fig = plt.figure(figsize=(10,6)) # Generate 5 line charts in the base subplot, using the MONTH column for the x-axis instead of the DATE column: # One line chart using data from 1948, with the line color set to "red". # One line chart using data from 1949, with the line color set to "blue". # 1950: color "green" # 1951 : color "orange" # 1952 : color "black" colors = ["red", "blue", "green", "orange", "black"] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] plt.plot(subset["MONTH"], subset["VALUE"], c = colors[i]) plt.show() # In[19]: unrate['MONTH'].unique() # ## adding axis label and title # In[20]: fig = plt.figure(figsize=(10,6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) plt.legend(loc='upper left') plt.title("Monthly Unemployment Trends, 1948-1952") plt.xlabel("Month, Integer") plt.ylabel("Unemployment Rate, Percent") plt.show() # In[ ]: