This is currently only intended for use with block-level data as base units.
James Gaboardi (jgaboardi@gmail.com), 2020-05
%load_ext watermark
%watermark
2020-06-13T12:08:18-04:00 CPython 3.7.6 IPython 7.15.0 compiler : Clang 9.0.1 system : Darwin release : 19.5.0 machine : x86_64 processor : i386 CPU cores : 8 interpreter: 64bit
import inspect
import nhgisxwalk
import numpy
import pandas
%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv
watermark 2.0.2 numpy 1.18.5 nhgisxwalk 0.0.2 pandas 1.0.4
subset_state = "10" # Delaware
#subset_state = "11" # DC
#subset_state = "15" # Hawaii
source_year, target_year = "1990", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year
base_xwalk_name = "nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = "../../crosswalks/%s" % base_xwalk_name
base_xwalk_file
'../../crosswalks/nhgis_blk1990_blk2010_gj.csv.zip'
base_source_name = "%s_block/%s_block.csv" % (source_year, source_year)
base_source_file = "../../tabular_data/%s" % base_source_name
base_source_file
'../../tabular_data/1990_block/1990_block.csv'
supp_source_name = "%s_blck_grp_598_103/%s_blck_grp_598_103.csv" % (
source_year, source_year
)
supp_source_file = "../../tabular_data/%s" % supp_source_name
supp_source_file
'../../tabular_data/1990_blck_grp_598_103/1990_blck_grp_598_103.csv'
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()
/Users/the-gaboardi/miniconda3/envs/nhgis/lib/python3.7/site-packages/numpy/lib/arraysetops.py:569: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison mask |= (ar1 == a)
GJOIN1990 | GJOIN2010 | WEIGHT | PAREA_VIA_BLK00 | |
---|---|---|---|---|
0 | NaN | G01000300107014085 | 0.0 | 0.0 |
1 | NaN | G01000300107014086 | 0.0 | 0.0 |
2 | NaN | G01000300107014089 | 0.0 | 0.0 |
3 | NaN | G01000300107014091 | 0.0 | 0.0 |
4 | NaN | G01000300107014109 | 0.0 | 0.0 |
ss_base = base_xwalk[
base_xwalk["GJOIN2010"].map(lambda x: x[1:3] == subset_state)
].copy()
ss_base.reset_index(drop=True, inplace=True)
not needed for creating a subset perse, but should do regardless
input_vars = [
nhgisxwalk.desc_code_1990["Persons"]["Total"],
nhgisxwalk.desc_code_1990["Families"]["Total"],
nhgisxwalk.desc_code_1990["Households"]["Total"],
nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_var_tags = ["pop", "fam", "hh", "hu"]
state_bgp1990trt2010 = nhgisxwalk.GeoCrossWalk(
base_xwalk,
source_year=source_year,
target_year=target_year,
source_geo="bgp",
target_geo="trt",
base_source_table=base_source_file,
supp_source_table=supp_source_file,
input_var=input_vars,
weight_var=input_var_tags,
keep_base=True,
add_geoid=True,
stfips=subset_state
)
del base_xwalk
state_bgp1990trt2010.xwalk
bgp1990gj | trt2010gj | trt2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
80076 | G100001090444072500423009999999999921 | G1000010043202 | 10001043202 | 1.0 | 1.0 | 1.0 | 1.0 |
80077 | G100001090444444300422009999999999926 | G1000010042202 | 10001042202 | 1.0 | 1.0 | 1.0 | 1.0 |
80078 | G100001090444612650422009999999219011 | G1000010041200 | 10001041200 | 0.0 | 0.0 | 0.0 | 0.0 |
80079 | G100001090444612650422009999999219011 | G1000010042201 | 10001042201 | 1.0 | 1.0 | 1.0 | 1.0 |
80080 | G100001090444612650422009999999219012 | G1000010042201 | 10001042201 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
81134 | G100005093552999990515009999999999923 | G1000050051500 | 10005051500 | 1.0 | 1.0 | 1.0 | 1.0 |
81135 | G100005093552999990515009999999999924 | G1000050051500 | 10005051500 | 1.0 | 1.0 | 1.0 | 1.0 |
81136 | G100005093552999990516009999999999921 | G1000050051702 | 10005051702 | 1.0 | 1.0 | 1.0 | 1.0 |
284765 | G340033010610106000204029999999916014 | G1000030990100 | 10003990100 | 0.0 | 0.0 | 0.0 | 0.0 |
507805 | NaN | G1000050990000 | 10005990000 | 0.0 | 0.0 | 0.0 | 0.0 |
1063 rows × 7 columns
state_bgp1990trt2010.xwalk.drop_duplicates(subset=["bgp1990gj", "trt2010gj"])
bgp1990gj | trt2010gj | trt2010ge | wt_pop | wt_fam | wt_hh | wt_hu | |
---|---|---|---|---|---|---|---|
80076 | G100001090444072500423009999999999921 | G1000010043202 | 10001043202 | 1.0 | 1.0 | 1.0 | 1.0 |
80077 | G100001090444444300422009999999999926 | G1000010042202 | 10001042202 | 1.0 | 1.0 | 1.0 | 1.0 |
80078 | G100001090444612650422009999999219011 | G1000010041200 | 10001041200 | 0.0 | 0.0 | 0.0 | 0.0 |
80079 | G100001090444612650422009999999219011 | G1000010042201 | 10001042201 | 1.0 | 1.0 | 1.0 | 1.0 |
80080 | G100001090444612650422009999999219012 | G1000010042201 | 10001042201 | 1.0 | 1.0 | 1.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... |
81134 | G100005093552999990515009999999999923 | G1000050051500 | 10005051500 | 1.0 | 1.0 | 1.0 | 1.0 |
81135 | G100005093552999990515009999999999924 | G1000050051500 | 10005051500 | 1.0 | 1.0 | 1.0 | 1.0 |
81136 | G100005093552999990516009999999999921 | G1000050051702 | 10005051702 | 1.0 | 1.0 | 1.0 | 1.0 |
284765 | G340033010610106000204029999999916014 | G1000030990100 | 10003990100 | 0.0 | 0.0 | 0.0 | 0.0 |
507805 | NaN | G1000050990000 | 10005990000 | 0.0 | 0.0 | 0.0 | 0.0 |
1063 rows × 7 columns
state_bgp1990trt2010.xwalk["bgp1990gj"].nunique()
777
state_bgp1990trt2010.xwalk["trt2010gj"].nunique()
218
ss_base.to_csv("../testing_data_subsets/%s" % base_xwalk_name)
ss_base
GJOIN1990 | GJOIN2010 | WEIGHT | PAREA_VIA_BLK00 | |
---|---|---|---|---|
0 | NaN | G10000100432021078 | 0.0 | 0.0 |
1 | NaN | G10000100432023014 | 0.0 | 0.0 |
2 | NaN | G10000100432023015 | 0.0 | 0.0 |
3 | NaN | G10000109900000011 | 0.0 | 0.0 |
4 | NaN | G10000109900000012 | 0.0 | 0.0 |
... | ... | ... | ... | ... |
38292 | G10000500519289 | G10000500519002125 | 1.0 | 1.0 |
38293 | G34003300204401A | G10000309901000007 | 0.0 | 0.0 |
38294 | G34003300204418 | G10000309901000007 | 0.0 | 0.0 |
38295 | G34003300204419 | G10000309901000007 | 0.0 | 0.0 |
38296 | G34003300204420 | G10000309901000007 | 0.0 | 0.0 |
38297 rows × 4 columns
blk1990 = ss_base[~ss_base["GJOIN1990"].isna()]["GJOIN1990"]
blk1990
149 G10000100401101 150 G10000100401102 151 G10000100401102 152 G10000100401102 153 G10000100401103 ... 38292 G10000500519289 38293 G34003300204401A 38294 G34003300204418 38295 G34003300204419 38296 G34003300204420 Name: GJOIN1990, Length: 38148, dtype: object
blk1990.shape
(38148,)
blk1990.nunique()
15538
# read in base source file
base_source_df = pandas.read_csv(base_source_file, dtype=str)
base_source_df
GISJOIN | YEAR | ANRCA | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | TRACTA | ... | STATE | STATEA | URBRURALA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G01000100201101A | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 101A | 1 | 0201 | ... | Alabama | 01 | 1 | 5240 | NaN | Block 101A | 332 | 91 | 107 | 112 |
1 | G01000100201101B | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 101B | 1 | 0201 | ... | Alabama | 01 | 2 | 9999 | NaN | Block 101B | 14 | 3 | 6 | 7 |
2 | G01000100201102A | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 102A | 1 | 0201 | ... | Alabama | 01 | 1 | 5240 | NaN | Block 102A | 248 | 74 | 88 | 89 |
3 | G01000100201103 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 103 | 1 | 0201 | ... | Alabama | 01 | 1 | 5240 | NaN | Block 103 | 49 | 15 | 15 | 16 |
4 | G01000100201104 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 104 | 1 | 0201 | ... | Alabama | 01 | 1 | 5240 | NaN | Block 104 | 12 | 3 | 4 | 4 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4934102 | G56004509513577B | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 577B | 5 | 9513 | ... | Wyoming | 56 | 2 | 9999 | NaN | Block 577B | 5 | 1 | 3 | 3 |
4934103 | G56004509513578 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 578 | 5 | 9513 | ... | Wyoming | 56 | 1 | 9999 | NaN | Block 578 | 59 | 20 | 21 | 22 |
4934104 | G56004509513579 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 579 | 5 | 9513 | ... | Wyoming | 56 | 1 | 9999 | NaN | Block 579 | 30 | 8 | 10 | 11 |
4934105 | G56004509513580 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 580 | 5 | 9513 | ... | Wyoming | 56 | 2 | 9999 | NaN | Block 580 | 35 | 9 | 11 | 11 |
4934106 | G56004509513597 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 597 | 5 | 9513 | ... | Wyoming | 56 | 2 | 9999 | NaN | Block 597 | 39 | 10 | 13 | 13 |
4934107 rows × 30 columns
base_source_df = base_source_df[base_source_df["GISJOIN"].isin(blk1990.unique())]
base_source_df.reset_index(drop=True, inplace=True)
base_source_df
GISJOIN | YEAR | ANRCA | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | TRACTA | ... | STATE | STATEA | URBRURALA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G10000100401101 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 101 | 1 | 0401 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 101 | 24 | 6 | 8 | 9 |
1 | G10000100401102 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 102 | 1 | 0401 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 102 | 145 | 39 | 50 | 58 |
2 | G10000100401103 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 103 | 1 | 0401 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 103 | 75 | 23 | 27 | 29 |
3 | G10000100401104 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 104 | 1 | 0401 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 104 | 69 | 19 | 21 | 22 |
4 | G10000100401105 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 105 | 1 | 0401 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 105 | 2 | 0 | 2 | 3 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
11935 | G10000500519289 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 289 | 2 | 0519 | ... | Delaware | 10 | 2 | 9999 | NaN | Block 289 | 8 | 1 | 6 | 6 |
11936 | G34003300204401A | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 401A | 4 | 0204 | ... | New Jersey | 34 | 1 | 9160 | NaN | Block 401A | 122 | 31 | 51 | 52 |
11937 | G34003300204418 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 418 | 4 | 0204 | ... | New Jersey | 34 | 1 | 9160 | NaN | Block 418 | 86 | 25 | 35 | 36 |
11938 | G34003300204419 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 419 | 4 | 0204 | ... | New Jersey | 34 | 1 | 9160 | NaN | Block 419 | 207 | 50 | 114 | 123 |
11939 | G34003300204420 | 1990 | 99 | 9999 | 9999 | 9999 | 9 | 420 | 4 | 0204 | ... | New Jersey | 34 | 1 | 9160 | NaN | Block 420 | 101 | 19 | 53 | 56 |
11940 rows × 30 columns
base_source_df.to_csv("../testing_data_subsets/%s_block.csv.zip" % source_year)
bgp1990 = state_bgp1990trt2010.xwalk[
~state_bgp1990trt2010.xwalk["bgp1990gj"].isna()
]["bgp1990gj"]
bgp1990
80076 G100001090444072500423009999999999921 80077 G100001090444444300422009999999999926 80078 G100001090444612650422009999999219011 80079 G100001090444612650422009999999219011 80080 G100001090444612650422009999999219012 ... 81133 G100005093552999990515009999999999922 81134 G100005093552999990515009999999999923 81135 G100005093552999990515009999999999924 81136 G100005093552999990516009999999999921 284765 G340033010610106000204029999999916014 Name: bgp1990gj, Length: 1062, dtype: object
# read in supplement source file
supp_source_df = pandas.read_csv(supp_source_file, dtype=str)
supp_source_df
GISJOIN | YEAR | ANRCA | AIANHH | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | ... | STATEA | URBRURALA | URB_AREA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G0100010901710322002119999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 01 | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 402 | 105 | 144 | 167 |
1 | G0100010901710322002119999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 01 | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 279 | 63 | 88 | 96 |
2 | G0100010901719999902119999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 01 | 2 | NaN | 9999 | NaN | BG 1 | 611 | 150 | 179 | 213 |
3 | G0100010901719999902119999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 01 | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 1244 | 318 | 410 | 444 |
4 | G0100010901719999902119999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 01 | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 447 | 126 | 166 | 197 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
367248 | G5600450935209999995129999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 56 | 2 | NaN | 9999 | NaN | BG 3 | 44 | 12 | 13 | 21 |
367249 | G5600450935209999995129999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 56 | 2 | NaN | 9999 | NaN | BG 4 | 42 | 12 | 18 | 22 |
367250 | G5600450935209999995129999999999925 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 5 | ... | 56 | 2 | NaN | 9999 | NaN | BG 5 | 29 | 6 | 9 | 15 |
367251 | G5600450935209999995129999999999926 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 6 | ... | 56 | 2 | NaN | 9999 | NaN | BG 6 (pt.) | 30 | 8 | 10 | 15 |
367252 | G5600450935209999995129999999999927 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 7 | ... | 56 | 2 | NaN | 9999 | NaN | BG 7 (pt.) | 30 | 8 | 9 | 11 |
367253 rows × 34 columns
# GISJOIN ID correction
bgpidcols = nhgisxwalk.id_codes.code_cols("bgp", "1990")
supp_source_df = nhgisxwalk.id_codes.bgp_gj(supp_source_df, bgpidcols, cname="__GISJOIN")
supp_source_df
GISJOIN | YEAR | ANRCA | AIANHH | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | ... | URBRURALA | URB_AREA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | __GISJOIN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G0100010901710322002119999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 402 | 105 | 144 | 167 | G010001090171032200211039999999999922 |
1 | G0100010901710322002119999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 279 | 63 | 88 | 96 | G010001090171032200211039999999999923 |
2 | G0100010901719999902119999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 2 | NaN | 9999 | NaN | BG 1 | 611 | 150 | 179 | 213 | G010001090171999990211039999999999921 |
3 | G0100010901719999902119999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 1244 | 318 | 410 | 444 | G010001090171999990211039999999999922 |
4 | G0100010901719999902119999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 447 | 126 | 166 | 197 | G010001090171999990211039999999999923 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
367248 | G5600450935209999995129999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 2 | NaN | 9999 | NaN | BG 3 | 44 | 12 | 13 | 21 | G560045093520999999512009999999999923 |
367249 | G5600450935209999995129999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 2 | NaN | 9999 | NaN | BG 4 | 42 | 12 | 18 | 22 | G560045093520999999512009999999999924 |
367250 | G5600450935209999995129999999999925 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 5 | ... | 2 | NaN | 9999 | NaN | BG 5 | 29 | 6 | 9 | 15 | G560045093520999999512009999999999925 |
367251 | G5600450935209999995129999999999926 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 6 | ... | 2 | NaN | 9999 | NaN | BG 6 (pt.) | 30 | 8 | 10 | 15 | G560045093520999999512009999999999926 |
367252 | G5600450935209999995129999999999927 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 7 | ... | 2 | NaN | 9999 | NaN | BG 7 (pt.) | 30 | 8 | 9 | 11 | G560045093520999999512009999999999927 |
367253 rows × 35 columns
supp_source_df = supp_source_df[supp_source_df["__GISJOIN"].isin(bgp1990.unique())].copy()
supp_source_df
GISJOIN | YEAR | ANRCA | AIANHH | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | ... | URBRURALA | URB_AREA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | __GISJOIN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
57044 | G1000010904440725004239999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 2 | NaN | 9999 | NaN | BG 1 (pt.) | 179 | 48 | 83 | 168 | G100001090444072500423009999999999921 |
57045 | G1000010904444443004229999999999926 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 6 | ... | 2 | NaN | 9999 | NaN | BG 6 (pt.) | 211 | 62 | 81 | 86 | G100001090444444300422009999999999926 |
57046 | G1000010904446126504229999999219011 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 1 | Dover, DE | 2190 | NaN | BG 1 (pt.) | 1848 | 490 | 615 | 694 | G100001090444612650422009999999219011 |
57047 | G1000010904446126504229999999219012 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 1 | Dover, DE | 2190 | NaN | BG 2 (pt.) | 67 | 17 | 21 | 22 | G100001090444612650422009999999219012 |
57048 | G1000010904446148004229999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 2 | NaN | 9999 | NaN | BG 4 (pt.) | 924 | 261 | 307 | 315 | G100001090444614800422009999999999924 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
57816 | G1000050935529999905159999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 1127 | 316 | 399 | 446 | G100005093552999990515009999999999922 |
57817 | G1000050935529999905159999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 228 | 74 | 99 | 108 | G100005093552999990515009999999999923 |
57818 | G1000050935529999905159999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 2 | NaN | 9999 | NaN | BG 4 | 808 | 214 | 269 | 310 | G100005093552999990515009999999999924 |
57819 | G1000050935529999905169999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 2 | NaN | 9999 | NaN | BG 1 | 1100 | 323 | 401 | 445 | G100005093552999990516009999999999921 |
213548 | G3400330106101060002049999999916014 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 1 | Wilmington, DE--NJ--MD--PA | 9160 | NaN | BG 4 (pt.) | 1049 | 215 | 380 | 400 | G340033010610106000204029999999916014 |
777 rows × 35 columns
supp_source_df.drop(columns=["__GISJOIN"], inplace=True)
supp_source_df.reset_index(drop=True, inplace=True)
supp_source_df
GISJOIN | YEAR | ANRCA | AIANHH | AIANHHA | RES_ONLYA | TRUSTA | RES_TRSTA | BLOCKA | BLCK_GRPA | ... | STATEA | URBRURALA | URB_AREA | URB_AREAA | CD103A | ANPSADPI | ET1001 | EUD001 | EUO001 | ESA001 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | G1000010904440725004239999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 10 | 2 | NaN | 9999 | NaN | BG 1 (pt.) | 179 | 48 | 83 | 168 |
1 | G1000010904444443004229999999999926 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 6 | ... | 10 | 2 | NaN | 9999 | NaN | BG 6 (pt.) | 211 | 62 | 81 | 86 |
2 | G1000010904446126504229999999219011 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 10 | 1 | Dover, DE | 2190 | NaN | BG 1 (pt.) | 1848 | 490 | 615 | 694 |
3 | G1000010904446126504229999999219012 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 10 | 1 | Dover, DE | 2190 | NaN | BG 2 (pt.) | 67 | 17 | 21 | 22 |
4 | G1000010904446148004229999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 10 | 2 | NaN | 9999 | NaN | BG 4 (pt.) | 924 | 261 | 307 | 315 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
772 | G1000050935529999905159999999999922 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 2 | ... | 10 | 2 | NaN | 9999 | NaN | BG 2 (pt.) | 1127 | 316 | 399 | 446 |
773 | G1000050935529999905159999999999923 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 3 | ... | 10 | 2 | NaN | 9999 | NaN | BG 3 (pt.) | 228 | 74 | 99 | 108 |
774 | G1000050935529999905159999999999924 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 10 | 2 | NaN | 9999 | NaN | BG 4 | 808 | 214 | 269 | 310 |
775 | G1000050935529999905169999999999921 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 1 | ... | 10 | 2 | NaN | 9999 | NaN | BG 1 | 1100 | 323 | 401 | 445 |
776 | G3400330106101060002049999999916014 | 1990 | 99 | NaN | 9999 | 9999 | 9999 | 9 | NaN | 4 | ... | 34 | 1 | Wilmington, DE--NJ--MD--PA | 9160 | NaN | BG 4 (pt.) | 1049 | 215 | 380 | 400 |
777 rows × 34 columns
supp_source_df.to_csv(
"../testing_data_subsets/%s_blck_grp_598_103.csv.zip" % source_year
)