from shapely.geometry import Polygon
import numpy as np
%matplotlib inline
import geopandas as gpd
from tobler import area_interpolate, area_tables
polys1 = gpd.GeoSeries([Polygon([(0,0), (10,0), (10,5), (0,5)]),
Polygon([(0,5), (0,10), (10,10), (10,5)])])
polys2 = gpd.GeoSeries([Polygon([(0,0), (5,0), (5,7), (0,7)]),
Polygon([(5,0), (5,10), (10,10), (10,0)]),
Polygon([(0,7), (0,10), (5,10), (5,7) ])
])
df1 = gpd.GeoDataFrame({'geometry': polys1})
df2 = gpd.GeoDataFrame({'geometry': polys2})
df1['population'] = [ 500, 200]
df1['pci'] = [75, 100]
df1['income'] = df1['population'] * df1['pci']
df2['population'] = [ 500, 100, 200]
df2['pci'] = [75, 80, 100]
df2['income'] = df2['population'] * df2['pci']
ax = df1.plot(color='red', edgecolor='k')
ax = df2.plot(color='green', alpha=0.5, edgecolor='k')
res_union = gpd.overlay(df1, df2, how='union')
ax = res_union.plot(alpha=0.5, cmap='tab10')
df1.plot(ax=ax, facecolor='none', edgecolor='k');
df2.plot(ax=ax, facecolor='none', edgecolor='k');
area_tables(df1, df2)
(array([[25., 0., 25., 0., 0.], [ 0., 10., 0., 25., 15.]]), array([[1., 0., 0.], [1., 0., 0.], [0., 1., 0.], [0., 1., 0.], [0., 0., 1.]]))
area_tables(df2, df1)
(array([[25., 0., 10., 0., 0.], [ 0., 25., 0., 25., 0.], [ 0., 0., 0., 0., 15.]]), array([[1., 0.], [1., 0.], [0., 1.], [0., 1.], [0., 1.]]))
extensive_vars = ['population', 'income']
intensive_vars = ['pci']
estimates = area_interpolate(df1, df2, extensive_variables = extensive_vars,
intensive_variables = intensive_vars)
estimates
(array([[ 290., 350., 60.], [22750., 28750., 6000.]]), array([[ 82.14285714, 87.5 , 100. ]]))
extensive_vars = ['population', 'income']
intensive_vars = ['pci']
estimates = area_interpolate(df2, df1, extensive_variables = extensive_vars,
intensive_variables = intensive_vars)
estimates
(array([[ 407.14285714, 392.85714286], [30785.71428571, 34714.28571429]]), array([[77.5, 85. ]]))
Here the first set of polygons have an envelope that does not coincide with that of the second dataframe.
polys1 = gpd.GeoSeries([Polygon([(0,0), (12,0), (12,5), (0,5)]),
Polygon([(0,5), (0,10), (10,10), (10,5)])])
polys2 = gpd.GeoSeries([Polygon([(0,0), (5,0), (5,7), (0,7)]),
Polygon([(5,0), (5,10), (10,10), (10,0)]),
Polygon([(0,7), (0,10), (5,10), (5,7) ])
])
df1 = gpd.GeoDataFrame({'geometry': polys1})
df2 = gpd.GeoDataFrame({'geometry': polys2})
df1['population'] = [ 500, 200]
df1['pci'] = [75, 100]
df1['income'] = df1['population'] * df1['pci']
df2['population'] = [ 500, 100, 200]
df2['pci'] = [75, 80, 100]
df2['income'] = df2['population'] * df2['pci']
ax = df1.plot(color='red', edgecolor='k')
df2.plot(ax=ax, color='green',edgecolor='k')
<matplotlib.axes._subplots.AxesSubplot at 0x7f02d8f1ada0>
extensive_vars = ['population']
intensive_vars = ['pci']
estimates = area_interpolate(df1, df2, extensive_variables = extensive_vars,
intensive_variables = intensive_vars)
estimates
(array([[290., 350., 60.]]), array([[ 82.14285714, 87.5 , 100. ]]))
estimates[0].sum()
700.0
extensive_vars = ['population']
intensive_vars = ['pci']
estimates = area_interpolate(df1, df2, extensive_variables = extensive_vars,
intensive_variables = intensive_vars,
allocate_total=False)
estimates
(array([[248.33333333, 308.33333333, 60. ]]), array([[ 82.14285714, 87.5 , 100. ]]))
estimates[0].sum()
616.6666666666667
When setting allocate_total=False
the total population of a source zone is not completely allocated, but rather the proportion of total population is set to the area of intersection over the area of the source zone.
This will have no effect when the source df is df2 and the target df is df 1:
extensive_vars = ['population']
estimates = area_interpolate(df2, df1, extensive_variables = extensive_vars)
estimates
(array([[407.14285714, 392.85714286]]), array([], dtype=float64))
extensive_vars = ['population']
estimates = area_interpolate(df2, df1, extensive_variables = extensive_vars, allocate_total=False)
estimates
(array([[407.14285714, 392.85714286]]), array([], dtype=float64))