#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd

import synthpop.zone_synthesizer as zs


# #### Specify sample data csv paths. See the files listed here for expected structure. Marginal tables require multi-indexed columns with category name and category value in levels 0 and 1 of the index. Sample file category columns should be labeled with corresponding category names and values in those columns should match the category value headers in the marginal table.

# In[3]:


hh_marginal_file = 'input_data/hh_marginals.csv'
person_marginal_file = 'input_data/person_marginals.csv'
hh_sample_file = 'input_data/household_sample.csv'
person_sample_file = 'input_data/person_sample.csv'


# #### Load and process input marginals and samples and geography crosswalk

# In[4]:


hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)


# In[6]:


hh_marg.head()


# In[7]:


p_marg.head()


# In[8]:


p_sample.head()


# #### Iterate over all marginals in the geography crosswalk and synthesize in-line

# In[9]:


all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)


# In[10]:


all_households.head()


# #### all_persons.household_id maps person records to all_households.index

# In[11]:


all_persons.head()


# #### Synthesize all marginal geographies in the crosswalk using a specified or default number of cores via multiprocessing

# In[12]:


all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)


# In[13]:


all_persons.head()


# In[14]:


all_households.head()


# In[15]:


all_stats


# In[ ]:


# In[ ]: