Labor force nonparticipation due to disability or illness, by CBSA¶

January 15, 2019

Brian Dew, brian.w.dew@gmail.com

This code creates a map showing the share of each center-based statistical area that is not in the labor force due to disability or illness.

In [1]:

%config Completer.use_jedi = False
# Import python packages
import pandas as pd
import numpy as np
import calendar

In [2]:

# Generate a list of file names matching the Census microdata files
latest_month = '12/1/2017'
cps_files = [f'{calendar.month_abbr[m.month].lower()}{m.year % 100}pub.dat' 
               for m in pd.date_range(end=latest_month, periods=24, freq='MS')]

ldate = pd.to_datetime(latest_month).strftime('%B %Y')
pdate = (pd.to_datetime(latest_month) - 
         pd.DateOffset(months=23)).strftime('%B %Y')

In [3]:

# Microdata files are fixed-width
# The variable locations are from the data dictionary
cps_vars = [('GTCBSA', 95, 100),     # CBSA (geo area)
            ('PRTAGE', 121, 123),    # Person's age
            ('PEMLR', 179, 181),     # Person's labor market status
            ('PWCMPWGT', 845, 855)]  # Person's composite weight

path = '/home/brian/Documents/CPS/data'  # Location of microdata files

In [4]:

d = {} # Empty dictionary to store monthly files
for file in cps_files:
    d[file] = (pd.read_fwf(f'{path}/{file}', 
                          colspecs=[[v[1], v[2]] for v in cps_vars],
                          names=[v[0] for v in cps_vars])
               .query('PEMLR != -1 and PWCMPWGT > 0 and GTCBSA > 0 '
                      'and PRTAGE > 15 and PRTAGE < 65'))

df = pd.concat(d, ignore_index=True)     # Combine into one dataframe

In [5]:

len(df.groupby('GTCBSA')['PEMLR'].sum().unique())

Out[5]:

In [6]:

# Check sample size to make sure I have at least 500 observations
min_obs = df.groupby('GTCBSA')['PEMLR'].agg('count').min()
max_obs = df.groupby('GTCBSA')['PEMLR'].agg('count').max()

print(f'Between {min_obs} and {max_obs} observations per CBSA')

Between 589 and 83326 observations per CBSA

In [7]:

# Calculate disabled NILF share of age group, by CBSA
dis = lambda x: np.average(np.where(x['PEMLR'] == 6, 1, 0), weights=x['PWCMPWGT'])
dis_list = df.groupby('GTCBSA').apply(dis)
dis_list.index = dis_list.index.map(str)

In [8]:

# Decide where to split the color codes for the map
bins = [0.004, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.2]
colors = ['#4d9221', '#a1d76a', '#e6f5d0', '#f7f7f7', '#fde0ef', '#e9a3c9', '#c51b7d']
colormap = pd.cut(dis_list, bins, labels=colors).to_dict()

In [9]:

%matplotlib inline
import os
os.environ['PROJ_LIB'] = '/home/brian/miniconda3/share/proj'
from mpl_toolkits.basemap import Basemap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

In [10]:

# Show contiguous US (and move Hawaii, no data for Alaska):
m = Basemap(llcrnrlon=-121, llcrnrlat=20, urcrnrlon=-64, urcrnrlat=49,
            projection='lcc', lat_1=33, lat_2=45, lon_0=-95)

In [11]:

fig, ax = plt.subplots(figsize=(16,8))
m.readshapefile('shapefiles/cb_2019_us_nation_20m', 'nation', color='gray', linewidth=0.3)
m.readshapefile('shapefiles/cb_2019_us_state_20m', 'states', drawbounds=False)
m.readshapefile('shapefiles/cb_2019_us_cbsa_20m', 'cbsa', drawbounds=False)

for info, shape in zip(m.states_info, m.states):
    seg = shape
    if info['NAME'] == 'Hawaii' and info['ALAND'] > 100:
        seg = [[x + 5200000, y-1400000] for x,y in shape]
        ax.add_patch(Polygon(seg, facecolor='white', edgecolor='gray', linewidth=0.3))
    ax.add_patch(Polygon(seg, facecolor='white', edgecolor='gray', linewidth=0.3))
    
for info, shape in zip(m.cbsa_info, m.cbsa):
    if info['GEOID'] in colormap.keys(): 
        color = colormap[info['GEOID']]
        seg = shape
        if info['NAME'][-2:] == 'HI' and info['RINGNUM'] == 1:
            seg = [[x + 5200000, y-1400000] for x,y in shape]
        ax.add_patch(Polygon(seg, facecolor=color, edgecolor='gray', linewidth=0.18))
        
plt.title('Share of age 16-64 population that is out of work due to disability', 
          fontsize=20, fontweight=800, loc='left', color='dimgrey', fontname='Lato')
plt.annotate(f'Source: Current Population Survey microdata, {pdate} to {ldate}',
             (0,0), (0, 0), fontsize=10, xycoords='axes fraction',
             textcoords='offset points', va='top')

ax.axis('off')

ax_inset = inset_axes(ax, width="16%", height="26%", loc=3, borderpad=3.6)
bars = list(pd.cut(dis_list, bins).reset_index().groupby(0)['GTCBSA'].nunique().values)
labels = [f'{int(x.left*100)} – {int(x.right*100)}%' 
          for x in pd.cut(dis_list, bins).sort_values().unique()[:-1]]
labels.append('12%+')
ax_inset.barh(list(range(0, len(bars))), bars, align='center', color=colors)
ax_inset.axis('off')
for i, bar in enumerate(bars):
    ax_inset.text(bar+2, i, bar, fontsize=9, va='center')
for i, label in enumerate(labels):
    ax_inset.text(-2, i, label, fontsize=9, va='center', ha='right')
ax_inset.text(-25, 7, 'Value     Count')

plt.show();

In [ ]: