James D. Gaboardi, 06/2020
%load_ext watermark
%watermark
2020-07-29T16:31:43-04:00 CPython 3.7.6 IPython 7.15.0 compiler : Clang 9.0.1 system : Darwin release : 19.6.0 machine : x86_64 processor : i386 CPU cores : 8 interpreter: 64bit
import nhgisxwalk
import inspect
import numpy
import pandas
%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv
watermark 2.0.2 pandas 1.0.4 numpy 1.18.5 nhgisxwalk 0.0.6
source_year, target_year = "1990", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year
base_xwalk_name = "nhgis_blk%s_blk%s_gj.zip" % (source_year, target_year)
base_xwalk_file = "../../crosswalks/%s" % base_xwalk_name
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
base_xwalk = pandas.read_csv(base_xwalk_file, dtype=data_types)
base_xwalk.head()
GJOIN1990 | GJOIN2010 | WEIGHT | PAREA_VIA_BLK00 | |
---|---|---|---|---|
0 | G01000100201101A | G01000100201002004 | 0.000753 | 0.014284 |
1 | G01000100201101A | G01000100201002005 | 0.042020 | 0.109618 |
2 | G01000100201101A | G01000100201002006 | 0.262146 | 0.498133 |
3 | G01000100201101A | G01000100201002016 | 0.237187 | 0.218109 |
4 | G01000100201101A | G01000100201002023 | 0.099097 | 0.012864 |
base_source_name = "%s_block/%s_block.csv" % (source_year, source_year)
base_source_file = "../../tabular_data/%s" % base_source_name
supp_source_name = "%s_blck_grp_598_103/%s_blck_grp_598_103.csv" % (
source_year, source_year
)
supp_source_file = "../../tabular_data/%s" % supp_source_name
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)
{'block': 'blk', 'block group part': 'bgp', 'block group': 'bg', 'tract': 'tr', 'county': 'co'}
nhgisxwalk.GeoCrossWalk
object¶nhgisxwalk.desc_code_1990
{'Persons': {'Persons': 'Universe', 'NP1': 'Source code', 'ET1': 'NHGIS code', 'Total': 'ET1001'}, 'Families': {'Families': 'Universe', 'NP2': 'Source code', 'EUD': 'NHGIS code', 'Total': 'EUD001'}, 'Households': {'Households': 'Universe', 'NP3': 'Source code', 'EUO': 'NHGIS code', 'Total': 'EUO001'}, 'Housing Units': {'Housing Units': 'Universe', 'NH1': 'Source code', 'ESA': 'NHGIS code', 'Total': 'ESA001'}}
input_vars = [
nhgisxwalk.desc_code_1990["Persons"]["Total"],
nhgisxwalk.desc_code_1990["Families"]["Total"],
nhgisxwalk.desc_code_1990["Households"]["Total"],
nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars
['ET1001', 'EUD001', 'EUO001', 'ESA001']
input_var_tags = ["pop", "fam", "hh", "hu"]
bgp1990_to_co2010 = nhgisxwalk.GeoCrossWalk(
base_xwalk,
source_year=source_year,
target_year=target_year,
source_geo="bgp",
target_geo="co",
base_source_table=base_source_file,
supp_source_table=supp_source_file,
input_var=input_vars,
weight_var=input_var_tags,
keep_base=False,
add_geoid=True
)
del base_xwalk
bgp1990_to_co2010.xwalk
bgp1990gj | co2010gj | co2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
0 | G010001090171032200211039999999999922 | G0100010 | 01001 | 1.0 | 1.0 | 1.0 | 1.0 |
1 | G010001090171032200211039999999999923 | G0100010 | 01001 | 1.0 | 1.0 | 1.0 | 1.0 |
2 | G010001090171999990211039999999999921 | G0100010 | 01001 | 1.0 | 1.0 | 1.0 | 1.0 |
3 | G010001090171999990211039999999999922 | G0100010 | 01001 | 1.0 | 1.0 | 1.0 | 1.0 |
4 | G010001090171999990211039999999999923 | G0100010 | 01001 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
375950 | G560045093520999999512009999999999923 | G5600450 | 56045 | 1.0 | 1.0 | 1.0 | 1.0 |
375951 | G560045093520999999512009999999999924 | G5600450 | 56045 | 1.0 | 1.0 | 1.0 | 1.0 |
375952 | G560045093520999999512009999999999925 | G5600450 | 56045 | 1.0 | 1.0 | 1.0 | 1.0 |
375953 | G560045093520999999512009999999999926 | G5600450 | 56045 | 1.0 | 1.0 | 1.0 | 1.0 |
375954 | G560045093520999999512009999999999927 | G5600450 | 56045 | 1.0 | 1.0 | 1.0 | 1.0 |
375955 rows × 7 columns
.csv
¶nat_dir = "../../crosswalks/"
nhgisxwalk.xwalk_df_to_csv(
dfkwds={
"df": bgp1990_to_co2010.xwalk,
"xwalk_name": bgp1990_to_co2010.xwalk_name
},
path=nat_dir
)
stfips_codes = nhgisxwalk.extract_unique_stfips(
df=bgp1990_to_co2010.xwalk, endpoint=bgp1990_to_co2010.target
)
stfips_codes = sorted(list(stfips_codes))
stfips_codes
['01', '02', '04', '05', '06', '08', '09', '10', '11', '12', '13', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '50', '51', '53', '54', '55', '56', 'nan']
state_dir = nat_dir + "nhgis_bgp1990_co2010_state/"
for stfips in stfips_codes:
xwalk_name = bgp1990_to_co2010.xwalk_name
source, target = bgp1990_to_co2010.target, bgp1990_to_co2010.target
_stxwalk = nhgisxwalk.extract_state(
bgp1990_to_co2010.xwalk,
stfips,
xwalk_name,
target,
sort_by=[source, target]
)
dfkwds = {"df": _stxwalk, "stfips": stfips, "xwalk_name": xwalk_name}
nhgisxwalk.xwalk_df_to_csv(dfkwds=dfkwds, path=state_dir)