#!/usr/bin/env python # coding: utf-8 # Open In Colab # # Finding Where to Thrive - Part III # ## Exploring Consumer Price Index Data # In this notebook, we'll load the five years of CPI data that we collected from the Bureau of Labor Statistics (https://www.bls.gov/cpi) using the public API, clean and prep the data for processing, and complete exploratory data analysis. # # The Consumer Price Index (CPI) is a measure of the average change over time in the prices paid by urban consumers for a market basket of consumer goods and services. The index measures price changes from a designed reference date, and in this case, the reference base is 1982-84 (CPI = 100). **The data we collected is for the four census regions (South, West, Northeast, and Midwest) and five consumer price indexes (All items, All items less food and energy, Energy, Food, and Shelter)**. # # The West region includes Alaska, Arizona, California, Colorado, Hawaii, Idaho, Montana, Nevada, New Mexico, Oregon, Utah, Washington, and Wyoming. # # The Midwest region is comprised of Illinois, Indiana, Iowa, Kansas, Michigan, Minnesota, Missouri, Nebraska, North Dakota, Ohio, South Dakota, and Wisconsin. # # The South region is comprised of Alabama, Arkansas, Delaware, District of Columbia, Florida, Georgia, Kentucky, Louisiana, Maryland, Mississippi, North Carolina, Oklahoma, South Carolina, Tennessee, Texas, Virginia, and West Virginia. # # The Northeast region is comprised of Connecticut, Maine, Massachusetts, New Hampshire, New Jersey, New York, Pennsylvania, Rhode Island, and Vermont. # # We'll look to answer the following questions: # # 1. Which region has had the largest CPI increase for all items? # # 2. Which region has had the largest CPI increases for energy, food, and shelter? # # 3. Which commodity saw the largest CPI increase? # First, we'll start with importing the libraries and tools that we'll need. # In[ ]: import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from dataprep.eda import plot, create_report # Next, we'll read in the Excel file that we created that lists the Series Report IDs and the region and consumer price index that the are tied to. We'll use this document as a dictionary to map the decriptions to our data since the API only returns the Series Report IDs and values. # In[ ]: # create dataframe from series mapping document cpi_mapping = pd.read_excel('BLS CPI/BLS Series Numbers.xlsx', sheet_name='Sheet1') # lower column names cpi_mapping.columns = cpi_mapping.columns.str.lower() # view mapping document cpi_mapping # Now we'll read in our collected data and map the region and category descriptions to the series_id. # In[ ]: # create cpi data df cpi_data = pd.read_csv('BLS CPI/CPI Data 2017 - 2021.csv') # change column names to lowercase cpi_data.columns = cpi_data.columns.str.lower() # change series id column name to snake_case cpi_data = cpi_data.rename(columns={'series id':'series_id'}) cpi_data.sample(5) # In[ ]: # create dictionaries to map region and description vaules to the cpi data bls_area = dict(zip(cpi_mapping.bls_series, cpi_mapping.region)) bls_desc = dict(zip(cpi_mapping.bls_series, cpi_mapping.description)) # map values from dictionary cpi_data['region'] = cpi_data['series_id'].map(bls_area) cpi_data['description'] = cpi_data['series_id'].map(bls_desc) # rename description column cpi_data.rename(columns={'description':'category'}, inplace=True) # reorder columns and drop series_id column cpi_data = cpi_data[['region', 'category', 'month', 'value']] # In[ ]: # view a sample of the data cpi_data.sample(10) # In[ ]: cpi_data.info() # view number of observations and check for null values # Since our data contains both monthy and yearly averages, we will subset the dataframe into to one dataframe containing yearly values, and one containing monthly values. # In[ ]: # subset dataframe for annual values cpi_ann = cpi_data[cpi_data['month'].str.contains('Annual')].copy() # trim year from month column cpi_ann['year'] = cpi_ann['month'].str[-4:] # change year to datetime data type cpi_ann['year'] = pd.to_datetime(cpi_ann['year']).dt.strftime('%Y') # drop month column cpi_ann.drop(columns={'month'}, inplace=True) # sort by year cpi_ann = cpi_ann.sort_values(by=['region','category','year']) # In[ ]: cpi_ann.head() # In[ ]: # subset dataframe for monthly values cpi_mth = cpi_data[~cpi_data['month'].str.contains('Annual')].copy() # change month column to datetime data type cpi_mth['month'] = pd.to_datetime(cpi_mth['month']) # In[ ]: cpi_mth.head() # In[ ]: # export cleaned dataframes a csvs cpi_mth.to_csv('BLS CPI/FWT_CPI_Month.csv', index=False) cpi_ann.to_csv('BLS CPI/FWT_CPI_Year.csv', index=False) # ### Exploratory Data Analysis # We'll start the EDA by plotting the monthly values for each region. # In[ ]: px.line(cpi_mth.query("region == 'South'"). sort_values(by='month'), x='month', y='value', color='category', labels={'value':'CPI', 'month':'', 'category':'Categories:'}, title="Consumer Price Index for Major Categories: Southern Region", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) # In[ ]: px.line(cpi_mth.query("region == 'West'"). sort_values(by='month'), x='month', y='value', color='category', labels={'value':'CPI', 'month':'', 'category':'Categories:'}, title="Consumer Price Index for Major Categories: Western Region", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) # In[ ]: px.line(cpi_mth.query("region == 'Northeast'"). sort_values(by='month'), x='month', y='value', color='category', labels={'value':'CPI', 'month':'', 'category':'Categories:'}, title="Consumer Price Index for Major Categories: Northeast Region", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) # In[ ]: px.line(cpi_mth.query("region == 'Midwest'"). sort_values(by='month'), x='month', y='value', color='category', labels={'value':'CPI', 'month':'', 'category':'Categories:'}, title="Consumer Price Index for Major Categories: Midwest Region", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) # Now we'll calculate the percentage change of each category from 2017 to 2021 using the annual averages and plot them as a bar graph by region. # In[ ]: cpi_ann['pct_change']= cpi_ann\ .groupby(['region','category'])['value']\ .pct_change(periods=4).mul(100) cpi_ann_pct = cpi_ann[['region', 'category', 'pct_change']]\ .dropna().sort_values(by='pct_change',ascending=False).copy() cpi_ann_pct.query("region == 'South'") # In[ ]: fig = px.bar(cpi_ann_pct.query("region == 'South'"), x='category', y='pct_change', color='category', text_auto='.3s', labels={'pct_change':'', 'category':''}, title="Southern Region Consumer Price Index % Change: 2017 to 2021", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("region == 'Northeast'"), x='category', y='pct_change', color='category', text_auto='.3s', labels={'pct_change':'', 'category':''}, title="Northeast Region Consumer Price Index % Change: 2017 to 2021", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("region == 'West'"), x='category', y='pct_change', color='category', text_auto='.3s', labels={'pct_change':'', 'category':''}, title="Western Region Consumer Price Index % Change: 2017 to 2021", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("region == 'Midwest'"), x='category', y='pct_change', color='category', text_auto='.3s', labels={'pct_change':'', 'category':''}, title="Midwest Region Consumer Price Index % Change: 2017 to 2021", height=600, category_orders={'category':['All items','Food','Energy','Shelter','All items less food and energy']}) fig.update_yaxes(ticksuffix="%") fig.show() # Now lets compare the regional percentage changes for each category. # In[ ]: fig = px.bar(cpi_ann_pct.query("category == 'All items'"), x='region', y='pct_change', color='region', text_auto='.3s', labels={'pct_change':'', 'region':''}, title="Consumer Price Index for All Items % Change: 2017 to 2021", height=600, category_orders={'region':['South','West','Northeast','Midwest']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("category == 'Food'"), x='region', y='pct_change', color='region', text_auto='.3s', labels={'pct_change':'', 'region':''}, title="Consumer Price Index for Food % Change: 2017 to 2021", height=600, category_orders={'region':['South','West','Northeast','Midwest']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("category == 'Energy'"), x='region', y='pct_change', color='region', text_auto='.3s', labels={'pct_change':'', 'region':''}, title="Consumer Price Index for Energy % Change: 2017 to 2021", height=600, category_orders={'region':['South','West','Northeast','Midwest']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("category == 'Shelter'"), x='region', y='pct_change', color='region', text_auto='.3s', labels={'pct_change':'', 'region':''}, title="Consumer Price Index for Shelter % Change: 2017 to 2021", height=600, category_orders={'region':['South','West','Northeast','Midwest']}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(cpi_ann_pct.query("category == 'All items less food and energy'"), x='region', y='pct_change', color='region', text_auto='.3s', labels={'pct_change':'', 'region':''}, title="Consumer Price Index for All Items (less food and energy) % Change: 2017 to 2021", height=600, category_orders={'region':['South','West','Northeast','Midwest']}) fig.update_yaxes(ticksuffix="%") fig.show() # ### Insights # **1. Which region has had the largest CPI increases for all items?** # # The Western region saw the largest CPI increase for all items (+12.9%). # # **2. Which region has had the largest CPI increases for energy, food, and shelter?** # # Energy: West (+28.4%) # # Food: West (+13.7%) # # Shelter: West (+14.8%) # # **3. Which commodity saw the largest CPI increase?** # # In all four regions, the commodity that saw the largest CPI increase was Energy. # ### Next Steps # In the next part of our analysis, we will take a look at State and metro area population data.