Generate national and state-level crosswalks¶

1990 block group parts to 2010 counties¶

NHGIS block crosswalks ¶

James D. Gaboardi, 06/2020

In [1]:

%load_ext watermark
%watermark

2020-07-29T16:31:43-04:00

CPython 3.7.6
IPython 7.15.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit

In [2]:

import nhgisxwalk
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
pandas     1.0.4
numpy      1.18.5
nhgisxwalk 0.0.6

Source and target years for the crosswalk¶

In [3]:

source_year, target_year = "1990", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year

Source-target building base¶

In [4]:

base_xwalk_name = "nhgis_blk%s_blk%s_gj.zip" % (source_year, target_year)
base_xwalk_file = "../../crosswalks/%s" % base_xwalk_name
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
base_xwalk = pandas.read_csv(base_xwalk_file, dtype=data_types)
base_xwalk.head()

Out[4]:

	GJOIN1990	GJOIN2010	WEIGHT	PAREA_VIA_BLK00
0	G01000100201101A	G01000100201002004	0.000753	0.014284
1	G01000100201101A	G01000100201002005	0.042020	0.109618
2	G01000100201101A	G01000100201002006	0.262146	0.498133
3	G01000100201101A	G01000100201002016	0.237187	0.218109
4	G01000100201101A	G01000100201002023	0.099097	0.012864

Source summary data¶

In [5]:

base_source_name = "%s_block/%s_block.csv" % (source_year, source_year)
base_source_file = "../../tabular_data/%s" % base_source_name

Source supplementary summary data (special case for 1990)¶

In [6]:

supp_source_name = "%s_blck_grp_598_103/%s_blck_grp_598_103.csv" % (
    source_year, source_year
)
supp_source_file = "../../tabular_data/%s" % supp_source_name

Convenience code shorthand/lookup¶

In [7]:

nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

Out[7]:

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bg',
 'tract': 'tr',
 'county': 'co'}

Instantiate an `nhgisxwalk.GeoCrossWalk` object¶

see nhgisxwalk.GeoCrossWalk for full details¶

In [8]:

nhgisxwalk.desc_code_1990

Out[8]:

{'Persons': {'Persons': 'Universe',
  'NP1': 'Source code',
  'ET1': 'NHGIS code',
  'Total': 'ET1001'},
 'Families': {'Families': 'Universe',
  'NP2': 'Source code',
  'EUD': 'NHGIS code',
  'Total': 'EUD001'},
 'Households': {'Households': 'Universe',
  'NP3': 'Source code',
  'EUO': 'NHGIS code',
  'Total': 'EUO001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH1': 'Source code',
  'ESA': 'NHGIS code',
  'Total': 'ESA001'}}

In [9]:

input_vars = [
    nhgisxwalk.desc_code_1990["Persons"]["Total"],
    nhgisxwalk.desc_code_1990["Families"]["Total"],
    nhgisxwalk.desc_code_1990["Households"]["Total"],
    nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars

Out[9]:

['ET1001', 'EUD001', 'EUO001', 'ESA001']

In [10]:

input_var_tags = ["pop", "fam", "hh", "hu"]

In [11]:

bgp1990_to_co2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="co",
    base_source_table=base_source_file,
    supp_source_table=supp_source_file,
    input_var=input_vars,
    weight_var=input_var_tags,
    keep_base=False,
    add_geoid=True
)
del base_xwalk
bgp1990_to_co2010.xwalk

Out[11]:

	bgp1990gj	co2010gj	co2010ge	wt_pop	wt_fam	wt_hh	wt_hu
0	G010001090171032200211039999999999922	G0100010	01001	1.0	1.0	1.0	1.0
1	G010001090171032200211039999999999923	G0100010	01001	1.0	1.0	1.0	1.0
2	G010001090171999990211039999999999921	G0100010	01001	1.0	1.0	1.0	1.0
3	G010001090171999990211039999999999922	G0100010	01001	1.0	1.0	1.0	1.0
4	G010001090171999990211039999999999923	G0100010	01001	1.0	1.0	1.0	1.0
...	...	...	...	...	...	...	...
375950	G560045093520999999512009999999999923	G5600450	56045	1.0	1.0	1.0	1.0
375951	G560045093520999999512009999999999924	G5600450	56045	1.0	1.0	1.0	1.0
375952	G560045093520999999512009999999999925	G5600450	56045	1.0	1.0	1.0	1.0
375953	G560045093520999999512009999999999926	G5600450	56045	1.0	1.0	1.0	1.0
375954	G560045093520999999512009999999999927	G5600450	56045	1.0	1.0	1.0	1.0

375955 rows × 7 columns

Write crosswalk to a `.csv`¶

In [12]:

nat_dir = "../../crosswalks/"
nhgisxwalk.xwalk_df_to_csv(
    dfkwds={
        "df": bgp1990_to_co2010.xwalk,
        "xwalk_name": bgp1990_to_co2010.xwalk_name
    },
    path=nat_dir
)

Split by (target) state and write out¶

In [13]:

stfips_codes = nhgisxwalk.extract_unique_stfips(
    df=bgp1990_to_co2010.xwalk, endpoint=bgp1990_to_co2010.target
)
stfips_codes = sorted(list(stfips_codes))
stfips_codes

Out[13]:

['01',
 '02',
 '04',
 '05',
 '06',
 '08',
 '09',
 '10',
 '11',
 '12',
 '13',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '53',
 '54',
 '55',
 '56',
 'nan']

In [14]:

state_dir = nat_dir + "nhgis_bgp1990_co2010_state/"
for stfips in stfips_codes:
    xwalk_name = bgp1990_to_co2010.xwalk_name
    source, target = bgp1990_to_co2010.target, bgp1990_to_co2010.target
    _stxwalk = nhgisxwalk.extract_state(
        bgp1990_to_co2010.xwalk,
        stfips,
        xwalk_name,
        target,
        sort_by=[source, target]
    )
    dfkwds = {"df": _stxwalk, "stfips": stfips, "xwalk_name": xwalk_name}
    nhgisxwalk.xwalk_df_to_csv(dfkwds=dfkwds, path=state_dir)