January 14, 2019
Brian Dew, @bd_econ
Example uses CPS microdata that has already been cleaned and saved as feather format files, to calculate what percent of people age 25 to 54 who were not working the year before due to disability but are employed in the current month.
# Import packages and set preliminaries
%matplotlib inline
import pandas as pd
import numpy as np
import os
from pathlib import Path
os.chdir('/home/brian/Documents/CPS/')
cps_dir = Path('/home/brian/Documents/CPS/data/clean/')
def cps_1mo(cps_dir, cps_dt, cols):
'''
Return 1 month of bd_CPS variables cols ending cps_dt
'''
if 'MONTH' not in cols:
cols = cols + ['MONTH']
if 'YEAR' not in cols:
cols = cols + ['YEAR']
cps_year = cps_dt.year
cps_month = cps_dt.month
df = (pd.read_feather(cps_dir / f'cps{cps_year}.ft', columns=cols)
.query('MONTH == @cps_month'))
return df
cols = ['CPSID', 'AGE', 'FEMALE', 'WBHAO', 'NILFREASON',
'LFS', 'MIS', 'BASICWGT', 'LINENO', 'RETIRED']
emp = lambda x: np.where(x.LFS_y == 'Employed', 1, 0)
dates = pd.date_range('1997-01-01', '2021-11-01', freq='MS')
d = {}
for dt in dates:
df1 = (cps_1mo(cps_dir, dt - pd.DateOffset(years=1), cols)
.query('RE'))
df2 = cps_1mo(cps_dir, dt, cols)
data = (pd.merge(df1, df2, on=['CPSID', 'LINENO', 'FEMALE', 'WBHAO'])
.query('25 <= AGE_y <= 54'))
data = (data.loc[(data.AGE_x <= data.AGE_y) &
(data.AGE_y - 2 <= data.AGE_x)]).assign(EMP = emp)
d[dt] = np.average(data.EMP, weights=data.BASICWGT_y) * 100
df = pd.Series(d).rolling(12).mean().dropna().rename('Share')
df.plot(color='blue', legend=None, title='Flow, Disability to Work');