import pandas as pd
import synthpop.zone_synthesizer as zs
hh_marginal_file = 'input_data/hh_marginals.csv'
person_marginal_file = 'input_data/person_marginals.csv'
hh_sample_file = 'input_data/household_sample.csv'
person_sample_file = 'input_data/person_sample.csv'
hh_marg, p_marg, hh_sample, p_sample, xwalk = zs.load_data(hh_marginal_file, person_marginal_file, hh_sample_file, person_sample_file)
hh_marg.head()
cat_name | cars | children | income | workers | |||||||
---|---|---|---|---|---|---|---|---|---|---|---|
cat_values | none | one | two or more | no | yes | gt100 | gt35-lt100 | lt35 | none | one | two or more |
1 | 7 | 49 | 197 | 41 | 215 | 57 | 125 | 74 | 72 | 77 | 105 |
2 | 9 | 59 | 237 | 68 | 239 | 83 | 126 | 98 | 87 | 93 | 125 |
3 | 10 | 69 | 275 | 79 | 279 | 74 | 170 | 114 | 102 | 108 | 146 |
4 | 11 | 76 | 302 | 167 | 224 | 42 | 105 | 244 | 111 | 118 | 160 |
5 | 18 | 117 | 466 | 86 | 517 | 50 | 261 | 292 | 171 | 182 | 247 |
p_marg.head()
cat_name | age | race | sex | |||||||
---|---|---|---|---|---|---|---|---|---|---|
cat_values | 19 and under | 20 to 35 | 35 to 60 | above 60 | asian | black | other | white | female | male |
1 | 312 | 108 | 223 | 177 | 64 | 0 | 0 | 756 | 440 | 380 |
2 | 235 | 143 | 296 | 181 | 0 | 0 | 0 | 855 | 452 | 403 |
3 | 303 | 229 | 445 | 174 | 0 | 0 | 24 | 1127 | 565 | 586 |
4 | 215 | 77 | 356 | 189 | 0 | 0 | 29 | 808 | 389 | 448 |
5 | 506 | 539 | 619 | 262 | 0 | 0 | 0 | 1926 | 981 | 945 |
p_sample.head()
serialno | sample_geog | age | race | sex | AGEP | SEX | RAC1P | |
---|---|---|---|---|---|---|---|---|
0 | 2012000005576 | 1 | 20 to 35 | white | male | 27 | 1 | 1 |
1 | 2012000017760 | 1 | 20 to 35 | white | male | 34 | 1 | 1 |
2 | 2012000021787 | 1 | 20 to 35 | white | male | 32 | 1 | 1 |
3 | 2012000021815 | 1 | 20 to 35 | white | male | 23 | 1 | 1 |
4 | 2012000065237 | 1 | 20 to 35 | white | male | 24 | 1 | 1 |
all_households, all_persons, all_stats = zs.synthesize_all_zones(hh_marg, p_marg, hh_sample, p_sample, xwalk)
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 254 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 306 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 356 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 390 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 601 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 324 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 556 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 342 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 273 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 228 households Drawing 857 households Drawing 748 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 744 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 953 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 719 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 185 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 183 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 286 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 317 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 711 households
c:\users\juan\documents\github\synthpop\synthpop\ipu\ipu.py:190: RuntimeWarning: divide by zero encountered in double_scalars adj = constraint / (column * weights).sum()
Drawing 345 households
all_households.head()
serialno | sample_geog | cars | workers | children | income | VEH | FINCP | NOC | WIF | cat_id | geog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
household_id | ||||||||||||
1 | 2010001229265 | 1 | none | none | no | lt35 | 0 | NaN | 0 | NaN | 6 | 1 |
2 | 2010000262947 | 1 | none | none | yes | lt35 | 0 | 0.0 | 1 | 0.0 | 15 | 1 |
3 | 2010000141368 | 1 | none | none | yes | lt35 | 0 | 3500.0 | 1 | 0.0 | 15 | 1 |
4 | 2012000162541 | 1 | none | none | yes | lt35 | 0 | 18300.0 | 2 | 0.0 | 15 | 1 |
5 | 2012001476110 | 1 | none | none | yes | lt35 | 0 | 3100.0 | 3 | 0.0 | 15 | 1 |
all_persons.head()
serialno | sample_geog | age | race | sex | AGEP | SEX | RAC1P | cat_id | geog | household_id | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2012000136005 | 1 | 20 to 35 | white | male | 31 | 1 | 1 | 69 | 1 | 85 |
1 | 2012000136005 | 1 | 35 to 60 | white | female | 36 | 2 | 1 | 76 | 1 | 85 |
2 | 2012000136005 | 1 | 19 and under | white | male | 10 | 1 | 1 | 61 | 1 | 85 |
3 | 2012000136005 | 1 | 19 and under | white | male | 8 | 1 | 1 | 61 | 1 | 85 |
4 | 2012000136005 | 1 | 19 and under | white | male | 5 | 1 | 1 | 61 | 1 | 85 |
all_persons, all_households, all_stats = zs.multiprocess_synthesize(hh_marg, p_marg, hh_sample, p_sample, xwalk)
all_persons.head()
serialno | sample_geog | age | race | sex | AGEP | SEX | RAC1P | cat_id | geog | household_id | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2012000136005 | 1 | 20 to 35 | white | male | 31 | 1 | 1 | 69 | 1 | 81 |
1 | 2012000136005 | 1 | 35 to 60 | white | female | 36 | 2 | 1 | 76 | 1 | 81 |
2 | 2012000136005 | 1 | 19 and under | white | male | 10 | 1 | 1 | 61 | 1 | 81 |
3 | 2012000136005 | 1 | 19 and under | white | male | 8 | 1 | 1 | 61 | 1 | 81 |
4 | 2012000136005 | 1 | 19 and under | white | male | 5 | 1 | 1 | 61 | 1 | 81 |
all_households.head()
serialno | sample_geog | cars | workers | children | income | VEH | FINCP | NOC | WIF | cat_id | geog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
household_id | ||||||||||||
1 | 2009000465330 | 1 | none | none | no | lt35 | 0 | NaN | 0 | NaN | 6 | 1 |
2 | 2009000455972 | 1 | none | none | yes | lt35 | 0 | 3500.0 | 2 | 0.0 | 15 | 1 |
3 | 2012001476110 | 1 | none | none | yes | lt35 | 0 | 3100.0 | 3 | 0.0 | 15 | 1 |
4 | 2012001476110 | 1 | none | none | yes | lt35 | 0 | 3100.0 | 3 | 0.0 | 15 | 1 |
5 | 2010000141368 | 1 | none | none | yes | lt35 | 0 | 3500.0 | 1 | 0.0 | 15 | 1 |
all_stats
chi-square | geog | p-score | |
---|---|---|---|
0 | 2.070855 | 1 | 9.998970e-01 |
1 | 154.420913 | 2 | 4.782383e-30 |
2 | 182.585670 | 3 | 5.033580e-32 |
3 | 96.436446 | 4 | 2.276937e-14 |
4 | 468.714250 | 5 | 4.243758e-97 |
5 | 5.642968 | 6 | 9.581878e-01 |
6 | 516.866875 | 7 | 2.458698e-101 |
7 | 79.638305 | 8 | 1.632443e-14 |
8 | 117.238663 | 9 | 1.836683e-20 |
9 | 2000.143804 | 10 | 0.000000e+00 |
10 | 642.117025 | 11 | 2.372825e-117 |
11 | 62.442878 | 12 | 4.130503e-07 |
12 | 1174.759712 | 13 | 8.130438e-234 |
13 | 670.414502 | 14 | 2.542384e-132 |
14 | 1081.225334 | 15 | 4.449635e-220 |
15 | 12.940226 | 16 | 7.357824e-02 |
16 | 1.161109 | 17 | 9.917950e-01 |
17 | 159.879713 | 18 | 2.084872e-27 |
18 | 0.507767 | 19 | 9.994175e-01 |
19 | 116.945628 | 20 | 7.393055e-18 |
20 | 14.386659 | 21 | 4.471640e-02 |