#!/usr/bin/env python # coding: utf-8 # ### Labor force nonparticipation due to disability or illness, by CBSA # # ---- # # January 15, 2019 # # Brian Dew, brian.w.dew@gmail.com # # This code creates a map showing the share of each center-based statistical area that is not in the labor force due to disability or illness. # In[1]: get_ipython().run_line_magic('config', 'Completer.use_jedi = False') # Import python packages import pandas as pd import numpy as np import calendar # In[2]: # Generate a list of file names matching the Census microdata files latest_month = '12/1/2017' cps_files = [f'{calendar.month_abbr[m.month].lower()}{m.year % 100}pub.dat' for m in pd.date_range(end=latest_month, periods=24, freq='MS')] ldate = pd.to_datetime(latest_month).strftime('%B %Y') pdate = (pd.to_datetime(latest_month) - pd.DateOffset(months=23)).strftime('%B %Y') # In[3]: # Microdata files are fixed-width # The variable locations are from the data dictionary cps_vars = [('GTCBSA', 95, 100), # CBSA (geo area) ('PRTAGE', 121, 123), # Person's age ('PEMLR', 179, 181), # Person's labor market status ('PWCMPWGT', 845, 855)] # Person's composite weight path = '/home/brian/Documents/CPS/data' # Location of microdata files # In[4]: d = {} # Empty dictionary to store monthly files for file in cps_files: d[file] = (pd.read_fwf(f'{path}/{file}', colspecs=[[v[1], v[2]] for v in cps_vars], names=[v[0] for v in cps_vars]) .query('PEMLR != -1 and PWCMPWGT > 0 and GTCBSA > 0 ' 'and PRTAGE > 15 and PRTAGE < 65')) df = pd.concat(d, ignore_index=True) # Combine into one dataframe # In[5]: len(df.groupby('GTCBSA')['PEMLR'].sum().unique()) # In[6]: # Check sample size to make sure I have at least 500 observations min_obs = df.groupby('GTCBSA')['PEMLR'].agg('count').min() max_obs = df.groupby('GTCBSA')['PEMLR'].agg('count').max() print(f'Between {min_obs} and {max_obs} observations per CBSA') # In[7]: # Calculate disabled NILF share of age group, by CBSA dis = lambda x: np.average(np.where(x['PEMLR'] == 6, 1, 0), weights=x['PWCMPWGT']) dis_list = df.groupby('GTCBSA').apply(dis) dis_list.index = dis_list.index.map(str) # In[8]: # Decide where to split the color codes for the map bins = [0.004, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.2] colors = ['#4d9221', '#a1d76a', '#e6f5d0', '#f7f7f7', '#fde0ef', '#e9a3c9', '#c51b7d'] colormap = pd.cut(dis_list, bins, labels=colors).to_dict() # In[9]: get_ipython().run_line_magic('matplotlib', 'inline') import os os.environ['PROJ_LIB'] = '/home/brian/miniconda3/share/proj' from mpl_toolkits.basemap import Basemap from mpl_toolkits.axes_grid1.inset_locator import inset_axes import matplotlib.pyplot as plt from matplotlib.patches import Polygon # In[10]: # Show contiguous US (and move Hawaii, no data for Alaska): m = Basemap(llcrnrlon=-121, llcrnrlat=20, urcrnrlon=-64, urcrnrlat=49, projection='lcc', lat_1=33, lat_2=45, lon_0=-95) # In[11]: fig, ax = plt.subplots(figsize=(16,8)) m.readshapefile('shapefiles/cb_2019_us_nation_20m', 'nation', color='gray', linewidth=0.3) m.readshapefile('shapefiles/cb_2019_us_state_20m', 'states', drawbounds=False) m.readshapefile('shapefiles/cb_2019_us_cbsa_20m', 'cbsa', drawbounds=False) for info, shape in zip(m.states_info, m.states): seg = shape if info['NAME'] == 'Hawaii' and info['ALAND'] > 100: seg = [[x + 5200000, y-1400000] for x,y in shape] ax.add_patch(Polygon(seg, facecolor='white', edgecolor='gray', linewidth=0.3)) ax.add_patch(Polygon(seg, facecolor='white', edgecolor='gray', linewidth=0.3)) for info, shape in zip(m.cbsa_info, m.cbsa): if info['GEOID'] in colormap.keys(): color = colormap[info['GEOID']] seg = shape if info['NAME'][-2:] == 'HI' and info['RINGNUM'] == 1: seg = [[x + 5200000, y-1400000] for x,y in shape] ax.add_patch(Polygon(seg, facecolor=color, edgecolor='gray', linewidth=0.18)) plt.title('Share of age 16-64 population that is out of work due to disability', fontsize=20, fontweight=800, loc='left', color='dimgrey', fontname='Lato') plt.annotate(f'Source: Current Population Survey microdata, {pdate} to {ldate}', (0,0), (0, 0), fontsize=10, xycoords='axes fraction', textcoords='offset points', va='top') ax.axis('off') ax_inset = inset_axes(ax, width="16%", height="26%", loc=3, borderpad=3.6) bars = list(pd.cut(dis_list, bins).reset_index().groupby(0)['GTCBSA'].nunique().values) labels = [f'{int(x.left*100)} – {int(x.right*100)}%' for x in pd.cut(dis_list, bins).sort_values().unique()[:-1]] labels.append('12%+') ax_inset.barh(list(range(0, len(bars))), bars, align='center', color=colors) ax_inset.axis('off') for i, bar in enumerate(bars): ax_inset.text(bar+2, i, bar, fontsize=9, va='center') for i, label in enumerate(labels): ax_inset.text(-2, i, label, fontsize=9, va='center', ha='right') ax_inset.text(-25, 7, 'Value Count') plt.show(); # In[ ]: # In[ ]: # In[ ]: # In[ ]: