#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# This file is part of the Minnesota Population Center's NHGISXWALK.
# For copyright and licensing information, see the NOTICE and LICENSE files
# in this project's top-level directory, and also on-line at:
#   https://github.com/ipums/nhgisxwalk


# # Sample workflow: 2000 block group parts to 2010 block groups
# 
# ## Starting from a subset of 2010 Delaware blocks
# 
# For further background information see:
# 
# * **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.
# 
# #### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

# In[1]:


get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', '')


# In[2]:


import nhgisxwalk
import inspect
import pandas

get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('watermark', '-w')
get_ipython().run_line_magic('watermark', '-iv')


# ### Source and target years for the crosswalk

# In[3]:


source_year, target_year = "2000", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year


# ### Source-target building base

# In[4]:


subset_data_dir = "../testing_data_subsets/"
base_xwalk_name = "nhgis_blk%s_blk%s_gj" % (source_year, target_year)
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
from_csv_kws = {"path": subset_data_dir, "archived": True, "remove_unpacked": True}
read_csv_kws = {"dtype": data_types}
base_xwalk = nhgisxwalk.xwalk_df_from_csv(
    base_xwalk_name, **from_csv_kws, **read_csv_kws
)
base_xwalk.head()


# ### Convenience code shorthand/lookup

# In[5]:


print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))


# In[6]:


nhgisxwalk.valid_geo_shorthand(shorthand_name=False)


# ### Instantiate an `nhgisxwalk.GeoCrossWalk` object
# ##### see [nhgisxwalk.GeoCrossWalk](https://github.com/ipums/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

# In[7]:


nhgisxwalk.desc_code_2000_SF1b


# In[8]:


input_vars = [
    nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"]
]
input_vars


# In[9]:


input_var_tags = ["pop", "fam", "hh", "hu"]


# In[10]:


subset_state = "10"
bgp2000_to_bg2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="bg",
    base_source_table=subset_data_dir+"/2000_block.csv.zip",
    input_var=input_vars,
    weight_var=input_var_tags,
    stfips=subset_state,
    keep_base=True,
    add_geoid=True
)
bgp2000_to_bg2010.xwalk


# ### Prepare a single data product with a `README.txt`

# In[11]:


xwalk, xwalk_name = bgp2000_to_bg2010.xwalk, bgp2000_to_bg2010.xwalk_name
xwalk_name_base = "_".join(xwalk_name.split("_")[:-1])


# In[12]:


out_data_dir = "../../crosswalks/"
out_path = "%s%s%s/%s" % (out_data_dir, xwalk_name_base, "_state", xwalk_name)
nhgisxwalk.prepare_data_product(xwalk, xwalk_name, out_path, remove=True)


# ### Read crosswalk from a `.zip` archive

# In[13]:


in_path = "%s%s%s" % (out_data_dir, xwalk_name_base, "_state/")
id_cols = [c for c in xwalk.columns if not c.startswith("wt")]
data_types = nhgisxwalk.str_types(id_cols)
from_csv_kws = {"path": in_path, "archived": True, "remove_unpacked": True}
read_csv_kws = {"dtype": data_types}
bgp2000_to_bg2010_df = nhgisxwalk.xwalk_df_from_csv(
    xwalk_name, **from_csv_kws, **read_csv_kws
)
bgp2000_to_bg2010_df


# -----------------------------------------------