#!/usr/bin/env python # coding: utf-8 # In[ ]: # This file is part of the Minnesota Population Center's NHGISXWALK. # For copyright and licensing information, see the NOTICE and LICENSE files # in this project's top-level directory, and also on-line at: # https://github.com/ipums/nhgisxwalk # # Sample workflow: 2000 block group parts to 2010 block groups # # ## Starting from a subset of 2010 Delaware blocks # # For further background information see: # # * **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335. # # #### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks) # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '') # In[2]: import nhgisxwalk import inspect import pandas get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('watermark', '-w') get_ipython().run_line_magic('watermark', '-iv') # ### Source and target years for the crosswalk # In[3]: source_year, target_year = "2000", "2010" gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year # ### Source-target building base # In[4]: subset_data_dir = "../testing_data_subsets/" base_xwalk_name = "nhgis_blk%s_blk%s_gj" % (source_year, target_year) data_types = nhgisxwalk.str_types([gj_src, gj_trg]) from_csv_kws = {"path": subset_data_dir, "archived": True, "remove_unpacked": True} read_csv_kws = {"dtype": data_types} base_xwalk = nhgisxwalk.xwalk_df_from_csv( base_xwalk_name, **from_csv_kws, **read_csv_kws ) base_xwalk.head() # ### Convenience code shorthand/lookup # In[5]: print(inspect.getsource(nhgisxwalk.valid_geo_shorthand)) # In[6]: nhgisxwalk.valid_geo_shorthand(shorthand_name=False) # ### Instantiate an `nhgisxwalk.GeoCrossWalk` object # ##### see [nhgisxwalk.GeoCrossWalk](https://github.com/ipums/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details # In[7]: nhgisxwalk.desc_code_2000_SF1b # In[8]: input_vars = [ nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"], nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"], nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"], nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"] ] input_vars # In[9]: input_var_tags = ["pop", "fam", "hh", "hu"] # In[10]: subset_state = "10" bgp2000_to_bg2010 = nhgisxwalk.GeoCrossWalk( base_xwalk, source_year=source_year, target_year=target_year, source_geo="bgp", target_geo="bg", base_source_table=subset_data_dir+"/2000_block.csv.zip", input_var=input_vars, weight_var=input_var_tags, stfips=subset_state, keep_base=True, add_geoid=True ) bgp2000_to_bg2010.xwalk # ### Prepare a single data product with a `README.txt` # In[11]: xwalk, xwalk_name = bgp2000_to_bg2010.xwalk, bgp2000_to_bg2010.xwalk_name xwalk_name_base = "_".join(xwalk_name.split("_")[:-1]) # In[12]: out_data_dir = "../../crosswalks/" out_path = "%s%s%s/%s" % (out_data_dir, xwalk_name_base, "_state", xwalk_name) nhgisxwalk.prepare_data_product(xwalk, xwalk_name, out_path, remove=True) # ### Read crosswalk from a `.zip` archive # In[13]: in_path = "%s%s%s" % (out_data_dir, xwalk_name_base, "_state/") id_cols = [c for c in xwalk.columns if not c.startswith("wt")] data_types = nhgisxwalk.str_types(id_cols) from_csv_kws = {"path": in_path, "archived": True, "remove_unpacked": True} read_csv_kws = {"dtype": data_types} bgp2000_to_bg2010_df = nhgisxwalk.xwalk_df_from_csv( xwalk_name, **from_csv_kws, **read_csv_kws ) bgp2000_to_bg2010_df # -----------------------------------------------