# This file is part of the Minnesota Population Center's NHGISXWALK.
# For copyright and licensing information, see the NOTICE and LICENSE files
# in this project's top-level directory, and also on-line at:
# https://github.com/ipums/nhgisxwalk
For further background information see:
%load_ext watermark
%watermark
2020-08-19T18:07:25-04:00 CPython 3.8.5 IPython 7.16.1 compiler : Clang 10.0.1 system : Darwin release : 19.6.0 machine : x86_64 processor : i386 CPU cores : 8 interpreter: 64bit
import nhgisxwalk
import inspect
import pandas
%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv
watermark 2.0.2 pandas 1.1.0 nhgisxwalk 0.0.9
source_year, target_year = "2000", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year
subset_data_dir = "../testing_data_subsets/"
base_xwalk_name = "nhgis_blk%s_blk%s_gj" % (source_year, target_year)
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
from_csv_kws = {"path": subset_data_dir, "archived": True, "remove_unpacked": True}
read_csv_kws = {"dtype": data_types}
base_xwalk = nhgisxwalk.xwalk_df_from_csv(
base_xwalk_name, **from_csv_kws, **read_csv_kws
)
base_xwalk.head()
GJOIN2000 | GJOIN2010 | WEIGHT | PAREA | |
---|---|---|---|---|
0 | G10000100401001000 | G10000100401001000 | 1.000000 | 1.000000 |
1 | G10000100401001001 | G10000100401001001 | 0.999981 | 0.999988 |
2 | G10000100401001001 | G10000100401001003 | 0.000019 | 0.000012 |
3 | G10000100401001002 | G10000100401001002 | 1.000000 | 1.000000 |
4 | G10000100401001003 | G10000100401001003 | 1.000000 | 1.000000 |
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))
def valid_geo_shorthand(shorthand_name=True): """Shorthand lookups for census geographies.""" lookup = { "blk": "block", "bgp": "block group part", "bg": "block group", "tr": "tract", "co": "county", } if not shorthand_name: lookup = {v: k for k, v in lookup.items()} return lookup
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)
{'block': 'blk', 'block group part': 'bgp', 'block group': 'bg', 'tract': 'tr', 'county': 'co'}
nhgisxwalk.GeoCrossWalk
object¶nhgisxwalk.desc_code_2000_SF1b
{'Persons': {'Persons': 'Universe', 'NP001A': 'Source code', 'FXS': 'NHGIS code', 'Total': 'FXS001'}, 'Families': {'Families': 'Universe', 'NP031A': 'Source code', 'F2V': 'NHGIS code', 'Total': 'F2V001'}, 'Households': {'Households': 'Universe', 'NP010A': 'Source code', 'FY4': 'NHGIS code', 'Total': 'FY4001'}, 'Housing Units': {'Housing Units': 'Universe', 'NH001A': 'Source code', 'FV5': 'NHGIS code', 'Total': 'FV5001'}}
input_vars = [
nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"],
nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"],
nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"],
nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"]
]
input_vars
['FXS001', 'F2V001', 'FY4001', 'FV5001']
input_var_tags = ["pop", "fam", "hh", "hu"]
subset_state = "10"
bgp2000_to_bg2010 = nhgisxwalk.GeoCrossWalk(
base_xwalk,
source_year=source_year,
target_year=target_year,
source_geo="bgp",
target_geo="bg",
base_source_table=subset_data_dir+"/2000_block.csv.zip",
input_var=input_vars,
weight_var=input_var_tags,
stfips=subset_state,
keep_base=True,
add_geoid=True
)
bgp2000_to_bg2010.xwalk
bgp2000gj | bg2010gj | bg2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
0 | G10000109044444430042202U1 | G10000100422021 | 100010422021 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | G10000109044461265042201R1 | G10000100422011 | 100010422011 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | G10000109044461265042201U1 | G10000100422011 | 100010422011 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | G10000109044461265042201U2 | G10000100422012 | 100010422012 | 1.0 | 1.0 | 1.0 | 1.0 |
4 | G10000109044461480042202R2 | G10000100422022 | 100010422022 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
1220 | G10000509355299999051500R4 | G10000500515004 | 100050515004 | 1.0 | 1.0 | 1.0 | 1.0 |
1221 | G10000509355299999051500U1 | G10000500515001 | 100050515001 | 1.0 | 1.0 | 1.0 | 1.0 |
1222 | G10000509355299999051500U3 | G10000500515003 | 100050515003 | 1.0 | 1.0 | 1.0 | 1.0 |
1223 | G10000509355299999051500U4 | G10000500515004 | 100050515004 | 1.0 | 1.0 | 1.0 | 1.0 |
1224 | G34003301061010600020400U2 | G10000309901000 | 100039901000 | 0.0 | 0.0 | 0.0 | 0.0 |
1225 rows × 7 columns
README.txt
¶xwalk, xwalk_name = bgp2000_to_bg2010.xwalk, bgp2000_to_bg2010.xwalk_name
xwalk_name_base = "_".join(xwalk_name.split("_")[:-1])
out_data_dir = "../../crosswalks/"
out_path = "%s%s%s/%s" % (out_data_dir, xwalk_name_base, "_state", xwalk_name)
nhgisxwalk.prepare_data_product(xwalk, xwalk_name, out_path, remove=True)
.zip
archive¶in_path = "%s%s%s" % (out_data_dir, xwalk_name_base, "_state/")
id_cols = [c for c in xwalk.columns if not c.startswith("wt")]
data_types = nhgisxwalk.str_types(id_cols)
from_csv_kws = {"path": in_path, "archived": True, "remove_unpacked": True}
read_csv_kws = {"dtype": data_types}
bgp2000_to_bg2010_df = nhgisxwalk.xwalk_df_from_csv(
xwalk_name, **from_csv_kws, **read_csv_kws
)
bgp2000_to_bg2010_df
bgp2000gj | bg2010gj | bg2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
0 | G10000109044444430042202U1 | G10000100422021 | 100010422021 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | G10000109044461265042201R1 | G10000100422011 | 100010422011 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | G10000109044461265042201U1 | G10000100422011 | 100010422011 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | G10000109044461265042201U2 | G10000100422012 | 100010422012 | 1.0 | 1.0 | 1.0 | 1.0 |
4 | G10000109044461480042202R2 | G10000100422022 | 100010422022 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
1220 | G10000509355299999051500R4 | G10000500515004 | 100050515004 | 1.0 | 1.0 | 1.0 | 1.0 |
1221 | G10000509355299999051500U1 | G10000500515001 | 100050515001 | 1.0 | 1.0 | 1.0 | 1.0 |
1222 | G10000509355299999051500U3 | G10000500515003 | 100050515003 | 1.0 | 1.0 | 1.0 | 1.0 |
1223 | G10000509355299999051500U4 | G10000500515004 | 100050515004 | 1.0 | 1.0 | 1.0 | 1.0 |
1224 | G34003301061010600020400U2 | G10000309901000 | 100039901000 | 0.0 | 0.0 | 0.0 | 0.0 |
1225 rows × 7 columns