#!/usr/bin/env python # coding: utf-8 # Romain Goldenberg / Flowminder foundation # # # Practical 1: Assessing Health Facilities Coverage # --- # --- # This exercise provides an example of how we can use geographical data within an application. # # **The problem:** We want to assess health facility coverage for maternal healthcare in Kaduna state. We are interested in finding out which areas are over-stretched with a high number of women of child-bearing age (WOCBA) per health-facility. By identifying a target number of people per health facility, we can begin to highlight locations that may need further invention. # ## Getting started # --- # ### Exercise Overview # --- # Using geospatial analysis in Python, this demo will show an assessment of health facility coverage for maternal health using the GRID3 population data for Kaduna State. # # In this exercise we are going to: # # - Load some spatial data # - Subset the data to focus on points of interest # - Aggregate dataset using some basic geospatial techniques # - Try and identify connections between datasets to set targets # ### Loading Packages # --- # In[1]: # General purpose libraries. import pandas as pd import os import numpy as np # Geospatial libraries. import geopandas as gpd import rasterio from rasterio.plot import show import folium import shapely # Voronoi and Zonal statistics libraries from rasterstats import zonal_stats # Visualization libraries. from IPython.display import display from matplotlib import pyplot as plt import matplotlib.colors as mcolors # ### Loading Datasets # --- # In[2]: # Path for the data folder data_path = os.path.join(os.getcwd(), "Data/Practical_1") # Path to population (we will open it later with rasterio) population = data_path + "/NGA_population_v1_2_agesex_f15_49_simple.tif" # Below, we directly open the different shapefiles datasets with geopandas: # 1- Health facilities health_care = gpd.read_file( data_path + "/health-care-facilities-primary-secondary-and-tertiary.geojson" ) # 2- State boundaries state_boundary = gpd.read_file(data_path + "/state-administrative-boundaries.geojson") # 3- Wards boundaries wards = gpd.read_file(data_path + "/operational-ward-boundaries.geojson") # We will be using four datasets in this example. They can be obtained from the GRID3 portal available at https://data.grid3.org/. # # >1. Health facility locations # >2. Gridded population *(We provide an already prepared dataset with women aged 15-49 for Kaduna State)* # >3. State boundary # >4. Ward boundaries # # In particular, the Nigeria Data portal is [available here](https://data.grid3.org/search?tags=NGA) # # # # ## Filtering Public health facilities # --- # We will view our health facilty data below. This includes data such as their location, the type of health centre (*primary, secondary, tertiary*), and whether it is private or publicly owned. An example showing some attributes (i.e. columns) of the data is shown below: # In[3]: # Manually creating the list of column indices indices = list(range(1, 3)) + [9, 11, 15, 19, 24] display(health_care.iloc[:, indices]) # For our example, we are only interested in public health centers. We will therefore filter the dataset below: # In[4]: # We select places where "ownership" = "Public", in other words public health centers. public_health_care = ( health_care[health_care["ownership"] == "Public"].copy().reset_index(drop=True) ) # We display the results. display(public_health_care.iloc[:, indices]) # ## Visualizing our datasets # --- # ### Vector data # --- # All of our data is geospatial, meaning it has a relationship to spatial location. Our `health care` dataset is point(s) data, `wards` and `state_boundary` polygon(s) data. They are examples of [shapefile data](https://gisgeography.com/spatial-data-types-vector-raster/). # So let's visualize our data on an interactive map. # # Below is the first layer of our map, the `wards` of Kaduna state: # In[5]: # explore() is a geopandas method to create interactive maps. # we assign it to the variable 'kaduna_map', to add more map layers after. kaduna_map = wards.explore( color="black", # Use black color for borders. # Styling instructions. We fill the wards with lightgrey color (when hovering over them), # and change the opacity of different elements. style_kwds=dict( fillColor="lightgrey", fill=True, opacity=1.0, fillOpacity=0, interactive=True ), tiles="OpenStreetMap", # Use Open Street Map background tiles. tooltip=False, # Do not show tooltip when hovering on wards. popup="ward_name", # Show the name of the ward on click. # Do not show the column label "ward_name" in the popup. popup_kwds=dict(labels=False), smooth_factor=0, # Prevent smoothing of the polygons edges. name="wards", # Name of the layer in the map. ) # Then we continue with our second layer, the `public_health_care` dataset: # In[6]: public_health_care.explore( m=kaduna_map, # Pass the previous map object 'kaduna_map'. column="category", # Make choropleth based on "category" column. tooltip="name", # Show "name" value in tooltip (on hover) # Do not show column label in the tooltip. tooltip_kwds=dict(labels=False), # Show the selected values in popup (on click). popup=["category", "name", "ownership"], cmap="gnuplot2", # Use "gnuplot2" matplotlib color scheme. marker_kwds=dict(radius=5), # Size of the points. # Styling instructions. We draw small black circles around our points, # and change the opacity of different elements. style_kwds=dict(color="black", weight=1, fill=True, opacity=0.5, fillOpacity=0.8), name="health_care", # Name of the layer in the map. ) # Use the folium library (which Geopandas is based on for interactive mapping) to add layer control folium.LayerControl().add_to(kaduna_map) # We can now display our map: # In[7]: kaduna_map # ### Raster data # --- # Our dataset is instead an example of data. Now, let's visualize our `population` [raster data](https://gisgeography.com/spatial-data-types-vector-raster/). We will use a different strategy for the sake of this exercise and produce here a simple static map. # We will use here the `rasterio` library to manage and open our raster data: # In[8]: # Use rasterio to import the raster data with rasterio.open(population) as pop: data_pop = pop.dataset_mask() # Returns the valid data mask. img_pop = pop.read() # Returns the full image. nodata_pop = pop.nodata # Returns the value of "no data" cells # Then, we can map our `population` dataset using `imshow()` from the `matplotlib` library. # In[9]: # We first define a normalization for our colormap (range of colors) norm = mcolors.TwoSlopeNorm(vmin=0, vmax=300.0, vcenter=30) # Then, we create a new figure fig, ax = plt.subplots(figsize=(10, 10)) # We first plot the population data. We use 'afmhot_r' as color scheme, normalize the color scheme, # and provide the geographic extent of the data in 'extent'. plot_pop = ax.imshow( data_pop, cmap="afmhot_r", norm=norm, extent=rasterio.plot.plotting_extent(pop) ) # Then, we plot our wards on top of the population data wards.plot(edgecolor="black", facecolor="none", linewidth=0.3, alpha=1, ax=ax) # Finally, we add a colorbar to the map fig.colorbar(plot_pop, fraction=0.043, pad=0.04, label="Population count", ax=ax) plt.title("Women aged 15-49 in Kaduna state") # Title for our plot plt.show() # Show the figure plt.close() # ## Computing Voronoi Polygons # --- # We are interested in finding out the health facility coverage across space. # # - Idea: optimize the partitioning of the area into polygons such that each polygon contains one health facility. # - Method: [Voronoi](https://en.wikipedia.org/wiki/Voronoi_diagram) Polygon # #
# #
# Voronoi. #
# Emergence of a Voronoi tessellation from points (from # Wikipedia) #
#
# ### Check on the CRS (Coordinate Reference System) # --- # # Before running this algorithm, we need to make sure our datasets have the same CRS. # In[10]: # Using .crs (from geopandas), we easily obtain information on the dataset CRS. public_health_care.crs # Currently, our data is in WGS 84 ([epsg:4326](https://epsg.io/4326)), a latitude/longitude coordinate system based on the Earth's center of mass (very common, used by the Global Positioning System among others). For our purpose, it is better to use a local projected coordinate system, rather than a global geographic system (as we currently have) to avoid distortions in results due to the projection. # We will now switch our data to the local CRS [epsg:26392](https://epsg.io/26392), a projected coordinate system (position measurements in meters) corresponding to Minna / Nigeria Mid Belt. # In[11]: # With .to_crs (from geopandas), we can easily reproject our data. public_health_care = public_health_care.to_crs("epsg:26392") state_boundary = state_boundary.to_crs("epsg:26392") wards = wards.to_crs("epsg:26392") # Let's have a look at the new CRS: # In[12]: public_health_care.crs # And a quick look at the data.\ # You should now see a difference in the coordinates of our `geometry` column, compared to the first time we opened the data: # In[13]: # We display the results. display(public_health_care.iloc[:, indices]) # ### Running the Voronoi algorithm # --- # We can now create our Voronoi polygons.\ # First, we select the geometry of our datasets: # In[14]: # We select the 'geometry' columns from both our datasets # This is our health care centers (points) points_health_care = public_health_care.geometry.copy() # Create a MultiPoint object from the GeoSeries points_health_care = shapely.geometry.MultiPoint(points_health_care.tolist()) # And this is the boundaries of Kaduna state polygon_state = state_boundary.geometry.iloc[0] # In[15]: polygon_state # In[16]: points_health_care # Then we can run the Voronoi creation: # In[17]: voronoi = shapely.voronoi_polygons(points_health_care, extend_to=polygon_state) # In[18]: voronoi # We can now extract our voronoi results and store them in a new geopandas dataset: # In[19]: # Extract geometries from the GeometryCollection geometries_list = [geom for geom in voronoi.geoms] # Convert list of geometries to GeoDataFrame, and make sure to indicate the CRS of the data # (epsg:26392, the same CRS we used to run the voronoi algorithm) voronoi_polygons = gpd.GeoDataFrame({"geometry": geometries_list}, crs="epsg:26392") # Intersect (i.e. cut) Voronoi polygons with Kaduna state voronoi_polygons = voronoi_polygons.geometry.intersection(polygon_state) # Create a new GeoDataFrame with clipped geometries and set the CRS voronoi_polygons = gpd.GeoDataFrame({"geometry": voronoi_polygons}, crs="epsg:26392") # Finally we reproject our datasets back to the original WGS 84 CRS (epsg:4326): # In[20]: voronoi_polygons = voronoi_polygons.to_crs("epsg:4326") public_health_care = public_health_care.to_crs("epsg:4326") state_boundary = state_boundary.to_crs("epsg:4326") # Let's now have a look at our results! # In[21]: voronoi_map = voronoi_polygons.explore( color="black", # Use black color for borders. # Styling instructions. We fill the wards with lightgrey color (when hovering over them), # and change the opacity of different elements. style_kwds=dict( fillColor="lightgrey", fill=True, opacity=1.0, fillOpacity=0, interactive=True ), tiles="openstreetmap", # Use "Open Street Map" background tiles. tooltip=False, # Do not show tooltip when hovering on wards. smooth_factor=0, # Prevent smoothing of the polygons edges. name="voronoi", # Name of the layer in the map. ) public_health_care.explore( m=voronoi_map, # Pass the previous map object 'voronoi_map'. column="category", # Make choropleth based on "category" column. tooltip="name", # Show "name" value in tooltip (on hover). # Do not show column label in the tooltip. tooltip_kwds=dict(labels=False), popup=True, # Show the selected values in popup (on click). cmap="gnuplot2", # Use "gnuplot2" matplotlib color scheme. marker_kwds=dict(radius=5), # Size of the points. # Styling instructions. We draw small black circles around our points, # and change the opacity of different elements. style_kwds=dict(color="black", weight=1, fill=True, opacity=0.5, fillOpacity=0.8), name="health_care", # Name of the layer in the map. ) folium.LayerControl().add_to(voronoi_map) # use folium to add layer control # We can now display our map: # In[22]: voronoi_map # ## Identifying Population Coverage Target with Zonal Statistics # --- # We are interested in finding out the population bound to specific health facilities. # # - Idea: find outliers in population per health facility, with a set population target. # - Method: using zonal statistics, we can obtain summmary statistics of raster values at polygon level. # # We can, using zonal statistics, count the number of people living in each of our voronoi polygons: # In[23]: # We use the function zonal_stats from the library 'rasterstats', # to calculate a mean and sum of population per polygon. # We store the results of the function in 'stats'. stats = zonal_stats( vectors=voronoi_polygons["geometry"], raster=population, stats=["mean", "sum"] ) # ### Data join # --- # Then, we can join these results back to our `voronoi_polygons` dataset: # In[24]: # We make a dataframe out of our 'stats' results, and join it back to the voronoi polygons. voronoi_polygons = voronoi_polygons.join( pd.DataFrame(stats), how="left" ) # Note the .join (simple join) method. # We can rename the new columns, and have a look back at `voronoi_polygons`: # In[25]: # Rename the columns to 'Population_mean' and 'Population_sum' voronoi_polygons = voronoi_polygons.rename( columns={"mean": "Population_mean", "sum": "Population_sum"} ) voronoi_polygons # ### Spatial join # --- # We will now finally join the population data back to our `public_health_care`. # To make sure we join the population data from our voronoi polygons to the correct health center data points, we will peform here instead a ***spatial join***.\ # The spatial join (denoted `.sjoin` below) is different from the simple join (denoted `.join` above) we just used. The previous join simply add the population information we calculated, by matching the data in order to each rows of the table of `voronoi_polygons`. The spatial join instead check the spatial location of each health care center point, and if this point is inside a particular voronoi polygon, it will add the information from the polygon to the point.\ # This is done for each health care center point: # In[26]: # We use a spatial join to join population results back to health care centers. public_health_care = public_health_care.sjoin( voronoi_polygons, how="left" ) # Note the .sjoin (spatial join) method. # We can make sure the data has been correctly added: # In[27]: # Display the results by descending order public_health_care.sort_values(by=["Population_sum"], ascending=False) display(public_health_care.iloc[:, indices + [26, 27]]) # And perform a final check to make sure all of our health care centers have a population value attached: # In[28]: # We check for null values (missing values) in each rows, and sum it back for each columns. public_health_care[["Population_mean", "Population_sum"]].isnull().sum() # ### Setting a target # --- # Here, we set a target at 4,000 women of child-bearing age. # In[29]: target = 4000 # Let's quickly plot an histogram, to have a look at the repartition of population for the different health care centers. # In[30]: # We create a new figure fig, ax = plt.subplots(figsize=(14, 5)) # We plot an histogram based on the 'population_sum' column ax.hist(public_health_care["Population_sum"], bins=50, alpha=0.5, edgecolor="black") # Draw a vertical line at population=4000 ax.axvline(x=target, color="k", linestyle="--") ax.set_xlabel("Population") ax.set_ylabel("Frequency") plt.show() # ## Identifying areas above target # --- # To finish, we just need to select voronoi areas where we have more than 4000 women of child-bearing age. # In[31]: outliers = voronoi_polygons[voronoi_polygons["Population_sum"] > target] # And using the different element we have already created, add our outliers to the interactive map: # In[32]: map_final = voronoi_polygons.explore( color="black", # Use black color for borders. # Styling instructions. We fill the wards with lightgrey color (when hovering over them), # and change the opacity of different elements. style_kwds=dict( fillColor="lightgrey", fill=True, opacity=1.0, fillOpacity=0, interactive=True ), tiles="openstreetmap", # Use "Open Street Map" background tiles. tooltip=False, # Do not show tooltip when hovering on wards. popup="Population_sum", # show population values in popup (on click). popup_kwds=dict(labels=True), # Show column label in the popup. smooth_factor=0, # Prevent smoothing of the polygons edges. name="voronoi", # Name of the layer in the map. ) outliers.explore( m=map_final, # Pass the previous map object 'map_final'. color="black", # Use black color for borders. # Styling instructions. We fill the outlier wards with red color, # and change the opacity of different elements. style_kwds=dict( fillColor="red", fill=True, opacity=0.0, fillOpacity=0.5, interactive=True ), tooltip=False, # Do not show tooltip when hovering on wards. popup="Population_sum", # Show population values in popup (on click). popup_kwds=dict(labels=True), # Show column label in the popup. smooth_factor=0, # Prevent smoothing of the polygons edges. name="wards", # Name of the layer in the map. ) public_health_care.explore( m=map_final, # Pass the previous map object 'map_final'. column="category", # Make choropleth based on "category" column. tooltip="name", # Show "name" value in tooltip (on hover). # Do not show column label in the tooltip. tooltip_kwds=dict(labels=False), popup=True, # Show the selected values in popup (on click). cmap="gnuplot2", # Use "gnuplot2" matplotlib color scheme. marker_kwds=dict(radius=5), # Size of the points. # Styling instructions. We draw small black circles around our points, # and change the opacity of different elements. style_kwds=dict(color="black", weight=1, fill=True, opacity=0.5, fillOpacity=0.8), name="health_care", # Name of the layer in the map. ) folium.LayerControl().add_to(map_final) # use folium to add layer control # In[33]: map_final