This notebook illustrates two blogposts (in French) :
Requires same packages as in "Spatial_Join" notebook, plus :
- conda install -c conda-forge dash
- conda install -c conda-forge -c plotly jupyter-dash
- conda install descartes
- conda install openpyxl
- conda install -c conda-forge flexpolyline
- conda install -c conda-forge python-kaleido
%matplotlib inline
import urllib
import time
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
import contextily as ctx
import shapely
from shapely.geometry import shape
from pyproj import CRS
import os
import requests
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange
import json
tqdm.pandas()
import shapely
import plotly.express as px
import dash
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import flexpolyline as fp
from credentials import here_api_key, here_app_code, here_app_id
crs = 'epsg:3857'
osm_crs= 'epsg:4326'
try : os.mkdir("output")
except FileExistsError: pass
from_files = False
# If False, Here API is called for geocoding and isolines, and result is stored in a file
# If True, API is not called, data is retreived from the file and plots are produced
add_options={}
option=None
dataset= "smur"
# dataset = "maternity"
# dataset = "vaccin"
# Uncomment to two following lines to add traffic
# add_options = {"departureTime" : "2021-05-28T14:00:00+01:00"} # with heavy traffic
# option="traffic"
# # Could also be {"transportMode":"pedestrian"}
def get_geocode(addr):
"""
Geocode an address with Here API
Parameters
----------
addr: str
address to geocode
filename: str
local file to save
Returns
-------
(lat, lon): (float, float)
geographical coordinates (in epsg:4326)
"""
host_name = "geocoder.api.here.com"
params = urllib.parse.urlencode({"app_id": here_app_id,
"app_code": here_app_code,
"searchtext":addr,
})
url = f"https://{host_name}/6.2/geocode.json?{params}"
# print(url)
with urllib.request.urlopen(url) as response:
res = response.read()
res = json.loads(res)
r = res["Response"]["View"][0]["Result"][0]["Location"]["DisplayPosition"]
return r["Latitude"], r["Longitude"]
def geocode_points(starts):
"""
Geocode all addresses in a geopandas GeoDataFrame. Add columns "lat", "lon" (in epsg:4326),
as well as "geometry" (in epsg:3857)
Parameters
----------
starts: geopandas.GeoDataFrame
a dataframe which contains a column "address"
Returns
-------
geopandas.GeoDataFrame, with 3 additionnal columns : "lat", "lon", and "geometry"
"""
starts[["lat", "lon"]] = starts.address.progress_apply(get_geocode).apply(pd.Series)
# Convert lat/long in xy points
starts= starts.set_geometry(gpd.points_from_xy(x = starts["lon"], y=starts["lat"]))
# Set CRS
starts.crs = CRS(osm_crs)
starts = starts.to_crs(CRS(crs))
return starts
def get_isoline(lat, lon, range_list, add_options={}):
"""
Get isolines 'lat, lon' as origin points, for all delays given in 'range_list' in minutes.
Call Here isoline API, with some default parameters that can be overwritten or enriched with 'add_options'
If 'Too Many Requests' error is received from Here, wait for 15 seconds and retries
Parameters
----------
lat: float
Latitude (in epsg:4326)
lon: float
Longitude (in epsg:4326)
range_list: list
List of range values (in minutes)
add_options : dict
Example: {"departureTime" : "2021-05-28T14:00:00+01:00", "transportMode":"pedestrian"}
Returns
-------
A dict, with keys from range_list, and data is a list of coordinates tuple.
Example :
{10.0: [(50.818634, 4.257374),
(50.818291, 4.258232),
(50.817604, 4.258919),
(...)],
15.0: (50.821381, 4.266815),
(50.821037, 4.267845),
(50.820351, 4.268532),
(...)]}
"""
host_name = "isoline.router.hereapi.com"
params = {"apiKey": here_api_key,
"transportMode":f"car",
"departureTime":"any", # No traffic dependent
"origin" : f"{lat},{lon}",
"range[values]" :",".join([str(rng*60) for rng in range_list]),
"range[type]":"time"# "distance"
}
for k in add_options:
params[k] = add_options[k]
params = urllib.parse.urlencode(params)
url = f"https://{host_name}/v8/isolines?{params}"
# print(url)
try:
with urllib.request.urlopen(url) as response:
res = response.read()
res = json.loads(res)
# print(res)
# return res
for it in res["isolines"]:
if len(it["polygons"]) != 1:
print("More than 1 polygon!!")
return {it["range"]["value"]/60: fp.decode(it["polygons"][0]["outer"]) for it in res["isolines"]} #["isoline"][0]["component"][0]["shape"]
except urllib.error.HTTPError as ex:
if ex.getcode() == 429: #Too Many Requests
print("Too many requests, wait for 15 seconds...")
time.sleep(15)
return get_isoline(lat, lon, range_list)
else:
print(ex)
print(type(ex))
print(url)
return None
# get_isoline(50.83458, 4.33639, [10])
def get_isolines(starts, range_list, add_options={}):
"""
From the output of geocode_points(starts), call get_isoline for each record, with range_list and add_options as parameter.
Note: does NOT support multi-component isolines (https://developer.here.com/documentation/isoline-routing-api/8.4.0/dev_guide/topics/multi-component-isoline.html)
Parameters
----------
start: geopandas.GeoDataFrame
output of geocode_points(starts)
range_list: list
list of range values (in minutes)
add_options : dict
Example: {"departureTime" : "2021-05-28T14:00:00+01:00", "transportMode":"pedestrian"}
Returns
-------
geopandas.GeoDataFrame
A geopandas.GeoDataFrame, with starts.shape[0] * len(range_list) record. For each starting point and each range,
"geometry" columns is a polygon with the corresponding isoline
"""
isolines= starts.copy()
isolines["isolines"] = isolines.progress_apply(lambda row: get_isoline(row["lat"], row["lon"], range_list, add_options), axis=1)
for rng in range_list:
isolines[rng] = isolines["isolines"].apply(lambda x: x[rng])
isolines = isolines.drop(["geometry", "isolines"], axis=1)
cols= isolines.drop(range_list, axis=1).columns
isolines = isolines.set_index(list(cols)).stack().rename("isoline").reset_index().rename(columns={"level_7": "range",
"level_8": "range",
"level_9": "range",
})
# Convert isolines to polygons
isolines["geometry"] = isolines["isoline"].apply(lambda row:Polygon(map(lambda tpl: (tpl[1], tpl[0]), row)))
isolines = isolines.set_geometry("geometry")
isolines.crs = CRS(osm_crs)
isolines = isolines.to_crs(CRS(crs))
return isolines.drop("isoline", axis=1)
def get_isolines_grouped(isolines, subsets):
"""
From the output of get_isolines(starts), group all isolines for the same range, making a (multi)polygon from all the
(single starting points) polygons
Parameters
----------
isolines: geopandas.GeoDataFrame
output of get_isolines
subsets: list
list of scenarii (corrsponding to boolean columns in isolines dataframe)
Returns
-------
geopandas.GeoDataFrame
A geopandas.GeoDataFrame, with len(subsets) * len(range_list) record. For each scenario point and each range,
"geometry" columns is a (mutli)polygon with the corresponding reachable region
"""
isolines_grouped = []
for subset in subsets:
gr = isolines[isolines[subset]].groupby("range").apply(lambda bloc: bloc.unary_union).rename("geometry").reset_index()
gr["subset"]=subset
isolines_grouped.append(gr)
isolines_grouped = gpd.GeoDataFrame(pd.concat(isolines_grouped))
isolines_grouped.crs = CRS(crs)
isolines_grouped["label"] = isolines_grouped.apply(lambda x: f"{x['subset']} ({x['range']})", axis=1)
return isolines_grouped
# get_isolines_grouped(isolines, subsets)
def add_basemap(ax, alpha=None):
"""
Add a basemap on a plot. Tries first default (Stamen) basemap. If errors, tries OpenStreetMap.Mapnik
Parameters
----------
ax: matplotlib axes
Returns
-------
None
"""
try:
ctx.add_basemap(ax, alpha=alpha)
except requests.HTTPError:
print("Default basemap doesn't work...")
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, alpha=alpha)
def disjoin_labels(geo_df):
"""
From the output of get_isolines_grouped, filtred on a unique range (we should then have len(subsets) records,
extract the overlapping regions.
Example :
geo_df= [["subs1", geom1],
["subs2", geom2]]
if geom2 in full included in geom1, output will be:
[["subs1", geom1-geom2],
["all scenarii", geom2]]
if there is an ovelapping region "common":
[["subs1", geom1-common],
["subs2", geom2-common],
["all scenarii", common]]
If more than 2 subsets, extracts also overlappings two by two
Parameters
----------
geo_df: geopandas.GeoDataFrame
Returns
-------
None
"""
## Find global common zone (common to all zones)
common_zone = geo_df.iloc[[0]]
for i in range(1, geo_df.shape[0]):
common_zone = gpd.overlay(common_zone,
geo_df.iloc[[i]],
how="intersection",
keep_geom_type=False)
common_zone = common_zone.geometry
if common_zone.shape[0]>0:
common_zone_s = gpd.GeoSeries([common_zone.iloc[0] for it in geo_df.iterrows()])
common_zone_s.crs= geo_df.crs
geo_df = geo_df.reset_index(drop=True).copy()
geo_df["common_zone"] = common_zone_s
try:
geo_df["geometry"]= geo_df.geometry.difference(geo_df["common_zone"])#.plot()
except shapely.errors.TopologicalError as e:
print(e)
print("Trying with buffer(0)...")
geo_df["geometry"]= geo_df.geometry.buffer(0).difference(geo_df["common_zone"])
#return geo_df#.drop("common_zone", axis=1)
m_range = geo_df.range.iloc[0]
geo_df = geo_df.append({"subset": "all scenarii",
"label" : f"all scenarii ({m_range})",
"geometry":common_zone.iloc[0],
"range": m_range,
"reach_ratio":0 # not correct!
}, ignore_index=True).drop("common_zone", axis=1).copy()
## Find one2one (remaining) overlaps (only if len(subsets)>2)
sh = geo_df.shape[0]
for i in range(sh):
for j in range(i+1, sh):
common_zone = geo_df.iloc[i].geometry.intersection(geo_df.iloc[j].geometry)
if geo_df.iloc[i].geometry.area > 0 and common_zone.area/geo_df.area.max() > 0.01:
geo_df["geometry"].iloc[i] = geo_df.iloc[i].geometry.difference(common_zone)
geo_df["geometry"].iloc[j] = geo_df.iloc[j].geometry.difference(common_zone)
geo_df = geo_df.append({"subset": f"{geo_df.iloc[i].subset} + {geo_df.iloc[j].subset}",
"label" : f"{geo_df.iloc[i].subset} + {geo_df.iloc[j].subset} ({m_range})",
"geometry":common_zone,
"range": m_range,
"reach_ratio":0 # not correct!
}, ignore_index=True).copy()
#common_zone.plot()
geo_df = geo_df[~geo_df.geometry.is_empty & (geo_df.area/geo_df.area.max() > 0.01)]
return geo_df
# from matplotlib import cm
from matplotlib.colors import ListedColormap
def plot_isolines(isolines_grouped, starts, m_range, boundary):
geo_df = isolines_grouped[(isolines_grouped.range == m_range)]
geo_df = disjoin_labels(geo_df).sort_values("label")
# display(geo_df)
cmap = ListedColormap( [f"C{i}" for i in range(geo_df.label.nunique())])
cmap_st = ListedColormap( [f"C{i}" for i in range(starts.label.nunique())])
ax= geo_df.plot("label", alpha=0.2, figsize=(10, 10), cmap=cmap)
ax = geo_df.plot("label", ax=ax, facecolor="none", edgecolor="k", cmap=cmap, legend=True)
starts.sort_values("label").plot("label", ax=ax, cmap=cmap_st)
boundaries.boundary.plot(ax=ax, color="black")
# boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
minx, miny, maxx, maxy = boundaries.total_bounds
width = maxx-minx
height = maxy-miny
ax.set_xlim(minx - width/10, maxx + width/10)
ax.set_ylim(miny - height/10, maxy + height/10)
ax.set_title(f"Reached zone ({dataset}{', '+option if option else ''}) - range: {m_range} minutes")
add_basemap(ax)#, alpha=0.5)
return ax
# ax= isolines[isolines.range == minute_range[5]].plot("name", alpha=0.8, figsize=(10, 10), legend=True)
# boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
# add_basemap(ax)
# ax = plot_isolines(isolines_grouped, starts, minute_range[10], boundaries)
# plt.axis("off")
# ax = plot_isolines(isolines_grouped, starts, minute_range[1], boundaries)
def plot_isolines_px(isolines_grouped, starts, m_range, boundary):
geo_df = isolines_grouped[(isolines_grouped.range == m_range)]
geo_df.crs = CRS(crs)
geo_df = geo_df.to_crs(osm_crs)
bnd = boundaries.copy()
bnd.crs = CRS(crs)
bnd = bnd.to_crs(osm_crs)
# display(geo_df)
geo_df = disjoin_labels(geo_df)
#display(geo_df)
fig = px.choropleth_mapbox(geo_df,
geojson=geo_df.geometry,
locations=geo_df.index,
hover_name="label",
hover_data=["label"],
color=geo_df['label'],
labels={"label": "Scenario"},
center={"lat": bnd.bounds[["miny","maxy"]].mean(axis=1).iloc[0],
"lon": bnd.bounds[["minx","maxx"]].mean(axis=1).iloc[0]},
# mapbox_style="carto-positron",
mapbox_style="open-street-map",
zoom=zoom,
width=950, height=800,
opacity=0.5,
title= f"Reached zone ({dataset}{', '+option if option else ''}) - range: {m_range} minutes"
)
#fig.update_geos(fitbounds="locations")
starts_df = starts.to_crs(osm_crs)
fig.add_trace(go.Scattermapbox(
lat=starts_df.lat,
lon=starts_df.lon,
text=starts_df.name,
name="Starts")
)
if isinstance(bnd.geometry.iloc[0], shapely.geometry.MultiPolygon):
# Get the main polygon
bnd_coords = pd.DataFrame(max(bnd.geometry.iloc[0], key=lambda a: a.area).exterior.coords)
# print(bnd)
else:
bnd_coords = pd.DataFrame(bnd.geometry.iloc[0].exterior.coords)
#[(x, y) for (x, y) in bnd.geometry.iloc[0].exterior.coords])
fig.add_trace(go.Scattermapbox(lat=bnd_coords[1],
lon=bnd_coords[0],
mode='lines',
name="Boundary"))
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="right",
x=0.99
))
return fig
import os
def download_if_nexist(url, filename):
"""
If the (local) file <filename> does not exists, download it from <url>
Parameters
----------
url: str
url to fetch
filename: str
local file to save
Returns
-------
None
"""
if not os.path.isfile(filename):
with urllib.request.urlopen(url) as response:
with open(filename, "wb") as f:
f.write(response.read())
dataset_label=f"{dataset}{'_'+option if option else ''}"
output_path=f"output/{dataset_label}"
try : os.mkdir(output_path)
except FileExistsError: pass
boundaries_filename = "data/boundaries.geojson"
if not os.path.isfile(boundaries_filename):
zipcode_boundaries_filename = "data/zipcode_boundaries_shapefile_3812.zip"
# https://www.geo.be/catalog/details/9738c7c0-5255-11ea-8895-34e12d0f0423?l=fr
download_if_nexist("https://bgu.bpost.be/assets/9738c7c0-5255-11ea-8895-34e12d0f0423_x-shapefile_3812.zip",
zipcode_boundaries_filename)
zipcodes_boundaries = gpd.read_file(f"zip://{zipcode_boundaries_filename}/3812")
bru = Polygon(zipcodes_boundaries[zipcodes_boundaries.nouveau_PO.between("1000", "1299")].unary_union.exterior)
bel = Polygon(max(zipcodes_boundaries.unary_union, key=lambda a: a.area).exterior)
all_boundaries= gpd.GeoDataFrame([{"name":"BRU", "geometry": bru},
{"name":"BEL", "geometry": bel}
])
all_boundaries = all_boundaries.set_geometry("geometry")
all_boundaries.crs = zipcodes_boundaries.crs
all_boundaries = all_boundaries.to_crs(CRS(crs))
all_boundaries.to_file(boundaries_filename, driver="GeoJSON")
else:
all_boundaries = gpd.read_file(boundaries_filename)
all_boundaries.plot("name", alpha=0.5)
if dataset == "smur":
starts = gpd.GeoDataFrame(columns =
["name", "address", "wk_xl", "wk_dlt", "wk_stjean", "wk_none"], data=[
["Paul Brien", "Av. Britsiers 5, 1030 Schaerbeek", True, True, True, True ],
["Saint-Pierre", "Rue Haute 290, 1000 Bruxelles", True, True, True, True ],
["Sainte-Elisabeth", "Avenue De Fré 206, 1180 Uccle", True, True, True, True ],
["Saint-Luc", "Avenue Hippocrate, 1200 Woluwe-Saint-Lambert", True, True, True, True ],
["Érasme (ULB)", "Rue Meylemeersch 56, 1070 Anderlecht", True, True, True, True ],
["HM", "Rue Bruyn 1, 1120 Bruxelles", True, True, True, True ],
["AZVUB", "Avenue du Laerbeek 101, 1090 Jette", True, True, True, True ],
["Ixelles", "Rue Jean Paquot 63, 1050 Ixelles", True, False, False, False],
["Delta", "Boulevard du Triomphe 201, 1160 Auderghem", False, True, False, False],
["Saint-Jean", "Rue du Marais 104, 1000 Bruxelles", False, False, True, False],
])
subsets = ["wk_xl", "wk_dlt", "wk_stjean"] #, "wk_none"
minute_range = list(range(16)) # list(range(21)) #[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 18]
if dataset == "smur":
boundaries = all_boundaries[all_boundaries.name == "BRU"]
zoom = 10.5
if dataset == "maternity":
starts = pd.read_csv("data/maternities.csv", usecols=["name", "address"])
starts = gpd.GeoDataFrame(starts)
starts["current"] = True
# https://kce.fgov.be/fr/une-r%C3%A9duction-du-nombre-de-maternit%C3%A9s-est-possible-dans-notre-pays
# From https://www.google.com/maps/d/u/0/viewer?mid=1BuSrXrmRLzqMNVNLR-hSq0PL53o9GaB9&ll=50.83934807412515%2C4.471731899999895&z=9
# https://plus.lesoir.be/273299/article/2020-01-16/voici-les-17-maternites-belges-qui-pourraient-fermer-leurs-portes-carte
# AZ Sint-Jan (Ostende)
# AZ Zeno (Knokke-Heist)
# AZ Delta (Turnhout)
# Sint-Andries (Tielt)
# St. Vincentius (Deinze)
# A.Z. (Audenarde)
# AZ Delta (Menin)
# AZ Glorieux (Renaix)
# Site St-Joseph, du CHR de Mons
# CH Haute Senne, site le Tilleriau (Soignies)
# CHU André Vésale - site de Lobbes (Montigny-le-Tilleul)
# C.H. Jolimont (Lobbes)
# CHR Sambre et Meuse (Auvelais)
# Centre Hospitalier Régional de Huy
# CHC - Clinique Sainte-Elisabeth (Verviers)
# St. Nikolaus Hospital (Eupen)
# ASZ (Grammont) - déjà fermé depuis l'été
if dataset == "maternity":
should_go_names = ["ST. JAN BRUGGE", "Zeno", "Torhout", "Tielt", "Deinze", "Oudenaarde", "Menen", "Glorieux",
"CHR Mons", "Tilleriau", "Vesale", "Lobbes", "Sambre", "Huy", "VERVIERS, CENTRE HOSPITALIER CHRETIEN",
"Eupen"]
starts["future"] = starts.name.str.upper().apply(lambda x: sum([sgn.lower() in x.lower() for sgn in should_go_names]) == 0)
assert starts[~starts.future].shape[0] == len(should_go_names)
subsets = ["current", "future"]
minute_range = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
if dataset == "maternity":
boundaries = all_boundaries[all_boundaries.name == "BEL"]
zoom = 7
if dataset == "vaccin":
starts = gpd.GeoDataFrame(columns =
["name", "address", "current", "with_WB"], data=[
["Pachéco", "Boulevard Pachéco, 42, 1000 Bruxelles", True, True],
["Heysel ", "Avenue Impératrice Charlotte, 6, 1020 Laeken", True, True],
["WSP", "Drève des Shetlands, 15, 1150 Woluwé-Saint-Pierre", True, True],
["Schaerbeek", "Avenue du Suffrage Universel, 1030 Schaerbeek", True, True],
["Molenbeek", "Chaussée de Gand, 696, 1080 Molenbeek", True, True],
["Forest", "Avenue Jupiter, 201, 1190 Forest", True, True],
["Anderlecht", "Avenue Théo Verbeeck, 2, 1070 Anderlecht", True, True],
["WSL", "Avenue des Vaillants, 2, 1200 Woluwé-Saint-Lambert", True, True],
["Uccle", "Rue Égide Van Ophem, 110, 1180 Uccle", True, True],
["HM", "Rue Bruyn, 200 à 1120 Neder-over-Heembeek", True, True],
["Watermael", "Boulevard du Souverain 25, 1170 Watermael-Boitsfort", False, True]
])
subsets = ["current", "with_WB"]
minute_range = list(range(21)) #[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
if dataset == "vaccin":
boundaries = all_boundaries[all_boundaries.name == "BRU"]
zoom = 10.5
starts["label"] = starts.apply(lambda row: ";".join(sb for sb in subsets if row[sb]), axis=1)
starts["label"] = starts["label"].str.replace(";".join(subsets), "all scenarii")
starts
starts.label.value_counts()
if not from_files:
starts = geocode_points(starts)
starts.to_pickle(f"data/starts_{dataset_label}.pkl.gz")
else:
starts = pd.read_pickle(f"data/starts_{dataset_label}.pkl.gz")
# fixing a bug that makes geodataframe being loaded from pickle to be not plottable
# after loading from pickle: starts.name.dtype is np.dtype("O") == False
n = starts.name.str.replace("willnotappear", "").copy()
starts = starts.drop("name", axis=1).assign(name=n)
# after those lines : # starts.name.dtype is np.dtype("O") == True
starts
fig = px.scatter_mapbox(starts,
lat="lat",
lon="lon",
hover_name="name",
color='label',
size_max=120,
zoom=10,
mapbox_style="open-street-map",
#opacity=0.2
)
bd = boundaries.to_crs(osm_crs)
fig_bd = px.choropleth_mapbox(bd,
geojson=bd.geometry,
locations=bd.index,
#labels="boundary",
#hover_name="name",
#fill_color="none",
#zoom=10,
opacity=0.2
)
fig.add_trace(fig_bd.data[0])
fig.show()