#!/usr/bin/env python # coding: utf-8 # # Generating h3 Hexgrids from GeoDataFrames # In[1]: import geopandas as gpd import matplotlib.pyplot as plt import pandas import libpysal from tobler.util import h3fy from tobler.area_weighted import area_interpolate get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '-v -a "author: eli knaap" -d -u -p tobler,cenpy,geopandas') # **Note**: This notebook relies on functionality from the [`contextily`](https://contextily.readthedocs.io/en/latest/) package that provides convenient basemaps for geospatial plots, and the [`cenpy`](https://cenpy-devs.github.io/cenpy/) package that provide a convenient interface to the U.S. Census API. These can be installed with # # `pip install contextily cenpy` # or # `conda install contextily cenpy -c conda-forge` # In[2]: import contextily as ctx from cenpy import products # ## Getting data from CenPy # To begin with, we will fetch some data from the 2017 ACS # In[3]: acs = products.ACS(2017) # We're looking for median home value, so first we will filter the ACS tables by those containing "value" in the description so we can find the correct variable code # In[4]: acs.filter_tables('VALUE', by='description') # The variable we're looking for is `B25077_001E`, the median home value of each. Lets collect that data for the Washington DC metropolitan region. The next cell can take a minute or two to run, depending on the speed of your connection. # In[5]: dc = acs.from_msa('Washington-Arlington', variables=['B25077_001E']) # In[6]: dc.head() # ## Creating Hexgrids with the `h3fy` function # Using the `h3fy` function from the `tobler.util` module, we can easily generate a hexgrid covering the face of the DC Metropolitan region # In[7]: dc_hex = h3fy(dc) # In[8]: fig, axs = plt.subplots(1,2, figsize=(18,10)) axs=axs.flatten() dc.plot(ax=axs[0], alpha=0.4, linewidth=1.6, edgecolor='white') dc_hex.plot(ax=axs[1], alpha=0.4, linewidth=1.6, edgecolor='white') axs[0].set_title('Original Tract Data') axs[1].set_title('Hex Grid') for i,_ in enumerate(axs): ctx.add_basemap(axs[i], source=ctx.providers.Stamen.TonerLite) axs[i].axis('off') # By altering the `resolution` parameter, we can generate grids using hexes of various sizes # In[9]: dc_hex_large = h3fy(dc, resolution=5) dc_hex_small = h3fy(dc, resolution=7) # In[10]: fig, axs = plt.subplots(1,2, figsize=(18,10)) dc_hex_large.plot(ax=axs[0], alpha=0.4, linewidth=1.6, edgecolor='white') dc_hex_small.plot(ax=axs[1], alpha=0.4, linewidth=1.6, edgecolor='white') for ax in axs: ctx.add_basemap(ax=ax, source=ctx.providers.Stamen.TonerLite) ax.axis('off') # and by using the `clip` parameter, we can ensure that the hexgrid is does not extend beyond the borders of the input geodataframe # In[11]: dc_hex_clipped = h3fy(dc, resolution=5, clip=True) # In[12]: fig, ax = plt.subplots(figsize=(10,10)) dc_hex_clipped.plot(ax=ax, alpha=0.4, linewidth=1.6, edgecolor='white') ctx.add_basemap(ax=ax, source=ctx.providers.Stamen.TonerLite) ax.axis('off') # ## Interpolating to a hexgrid # Thus, in just a few lines of code, we can estimate the value of census variables represented by a regular hexgrid # here, we will estimate the median home value of each hex in the DC region using simple areal interpolation # In[13]: dc_hex_interpolated = area_interpolate(source_df=dc, target_df=dc_hex, intensive_variables=['B25077_001E']) # In[14]: fig, axs = plt.subplots(1,2, figsize=(20,10)) dc.plot('B25077_001E', scheme='quantiles', alpha=0.5, ax=axs[0]) dc_hex_interpolated.plot('B25077_001E', scheme='quantiles', alpha=0.5, ax=axs[1]) axs[0].set_title('Original Data') axs[1].set_title('Interpolated Data') for ax in axs: ctx.add_basemap(ax=ax, source=ctx.providers.Stamen.TonerLite) ax.axis('off') plt.suptitle('Spatial Interpolation with the PySAL $\mathtt{tobler}$ package', fontsize=16)