For further background information see:
%load_ext watermark
%watermark
2020-06-21T20:09:36-04:00 CPython 3.7.6 IPython 7.15.0 compiler : Clang 9.0.1 system : Darwin release : 19.5.0 machine : x86_64 processor : i386 CPU cores : 8 interpreter: 64bit
import nhgisxwalk
import inspect
import numpy
import pandas
%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv
watermark 2.0.2 nhgisxwalk 0.0.5 numpy 1.18.5 pandas 1.0.4
source_year, target_year = "1990", "2010"
subset_data_dir = "../testing_data_subsets"
base_xwalk_name = "/nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = subset_data_dir + base_xwalk_name
data_types = nhgisxwalk.str_types(["GJOIN%s"%source_year, "GJOIN%s"%target_year])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()
GJOIN1990 | GJOIN2010 | WEIGHT | PAREA_VIA_BLK00 | |
---|---|---|---|---|
0 | NaN | G10000100432021078 | 0.0 | 0.0 |
1 | NaN | G10000100432023014 | 0.0 | 0.0 |
2 | NaN | G10000100432023015 | 0.0 | 0.0 |
3 | NaN | G10000109900000011 | 0.0 | 0.0 |
4 | NaN | G10000109900000012 | 0.0 | 0.0 |
supp_source_name = "%s_blck_grp_598_103.csv.zip" % source_year
supp_source_file = subset_data_dir + supp_source_name
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))
def valid_geo_shorthand(shorthand_name=True): """Shorthand lookups for census geographies.""" lookup = { "blk": "block", "bgp": "block group part", "bkg": "block group", "trt": "tract", "cty": "county", } if not shorthand_name: lookup = {v: k for k, v in lookup.items()} return lookup
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)
{'block': 'blk', 'block group part': 'bgp', 'block group': 'bkg', 'tract': 'trt', 'county': 'cty'}
nhgisxwalk.GeoCrossWalk
object¶nhgisxwalk.desc_code_1990
{'Persons': {'Persons': 'Universe', 'NP1': 'Source code', 'ET1': 'NHGIS code', 'Total': 'ET1001'}, 'Families': {'Families': 'Universe', 'NP2': 'Source code', 'EUD': 'NHGIS code', 'Total': 'EUD001'}, 'Households': {'Households': 'Universe', 'NP3': 'Source code', 'EUO': 'NHGIS code', 'Total': 'EUO001'}, 'Housing Units': {'Housing Units': 'Universe', 'NH1': 'Source code', 'ESA': 'NHGIS code', 'Total': 'ESA001'}}
input_vars = [
nhgisxwalk.desc_code_1990["Persons"]["Total"],
nhgisxwalk.desc_code_1990["Families"]["Total"],
nhgisxwalk.desc_code_1990["Households"]["Total"],
nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars
['ET1001', 'EUD001', 'EUO001', 'ESA001']
input_var_tags = ["pop", "fam", "hh", "hu"]
subset_state = "10"
bgp1990_to_cty2010 = nhgisxwalk.GeoCrossWalk(
base_xwalk,
source_year=source_year,
target_year=target_year,
source_geo="bgp",
target_geo="cty",
base_source_table=subset_data_dir+"/1990_block.csv.zip",
supp_source_table=subset_data_dir+"/1990_blck_grp_598_103.csv.zip",
input_var=input_vars,
weight_var=input_var_tags,
stfips=subset_state,
keep_base=True,
add_geoid=True
)
bgp1990_to_cty2010.xwalk
bgp1990gj | cty2010gj | cty2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
0 | G100001090444072500423009999999999921 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | G100001090444444300422009999999999926 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | G100001090444612650422009999999219011 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | G100001090444612650422009999999219012 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
4 | G100001090444614800422009999999999924 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
772 | G100005093552999990515009999999999922 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
773 | G100005093552999990515009999999999923 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
774 | G100005093552999990515009999999999924 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
775 | G100005093552999990516009999999999921 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
776 | G340033010610106000204029999999916014 | G1000030 | 10003 | 0.0 | 0.0 | 0.0 | 0.0 |
777 rows × 7 columns
.csv
¶state_dir = "../../crosswalks/nhgis_bgp1990_cty2010_state/"
nhgisxwalk.xwalk_df_to_csv(
cls=bgp1990_to_cty2010,
path=state_dir
)
.csv
¶fname = state_dir + bgp1990_to_cty2010.xwalk_name
bgp1990_to_bkg2010_df = nhgisxwalk.xwalk_df_from_csv(fname)
bgp1990_to_bkg2010_df
bgp1990gj | cty2010gj | cty2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
0 | G100001090444072500423009999999999921 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | G100001090444444300422009999999999926 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | G100001090444612650422009999999219011 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | G100001090444612650422009999999219012 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
4 | G100001090444614800422009999999999924 | G1000010 | 10001 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
772 | G100005093552999990515009999999999922 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
773 | G100005093552999990515009999999999923 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
774 | G100005093552999990515009999999999924 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
775 | G100005093552999990516009999999999921 | G1000050 | 10005 | 1.0 | 1.0 | 1.0 | 1.0 |
776 | G340033010610106000204029999999916014 | G1000030 | 10003 | 0.0 | 0.0 | 0.0 | 0.0 |
777 rows × 7 columns