#!/usr/bin/env python
# coding: utf-8
# # Spatial relationships and operations
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import geopandas
pd.options.display.max_rows = 10
# In[2]:
countries = geopandas.read_file("zip://./data/ne_110m_admin_0_countries.zip")
cities = geopandas.read_file("zip://./data/ne_110m_populated_places.zip")
rivers = geopandas.read_file("zip://./data/ne_50m_rivers_lake_centerlines.zip")
# ## Spatial relationships
#
# An important aspect of geospatial data is that we can look at *spatial relationships*: how two spatial objects relate to each other (whether they overlap, intersect, contain, .. one another).
#
# The topological, set-theoretic relationships in GIS are typically based on the DE-9IM model. See https://en.wikipedia.org/wiki/Spatial_relation for more information.
#
# ![](img/TopologicSpatialRelarions2.png)
# (Image by [Krauss, CC BY-SA 3.0](https://en.wikipedia.org/wiki/Spatial_relation#/media/File:TopologicSpatialRelarions2.png))
# ### Relationships between individual objects
# Let's first create some small toy spatial objects:
#
# A polygon (note: we use `.squeeze()` here to to extract the scalar geometry object from the GeoSeries of length 1):
# In[3]:
belgium = countries.loc[countries['name'] == 'Belgium', 'geometry'].squeeze()
# Two points:
# In[4]:
paris = cities.loc[cities['name'] == 'Paris', 'geometry'].squeeze()
brussels = cities.loc[cities['name'] == 'Brussels', 'geometry'].squeeze()
# And a linestring:
# In[5]:
from shapely.geometry import LineString
line = LineString([paris, brussels])
# Let's visualize those 4 geometry objects together (I only put them in a GeoSeries to easily display them together with the geopandas `.plot()` method):
# In[6]:
geopandas.GeoSeries([belgium, paris, brussels, line]).plot(cmap='tab10')
# You can recognize the abstract shape of Belgium.
#
# Brussels, the capital of Belgium, is thus located within Belgium. This is a spatial relationship, and we can test this using the individual shapely geometry objects as follow:
# In[7]:
brussels.within(belgium)
# And using the reverse, Belgium contains Brussels:
# In[8]:
belgium.contains(brussels)
# On the other hand, Paris is not located in Belgium:
# In[9]:
belgium.contains(paris)
# In[10]:
paris.within(belgium)
# The straight line we draw from Paris to Brussels is not fully located within Belgium, but it does intersect with it:
# In[11]:
belgium.contains(line)
# In[12]:
line.intersects(belgium)
# ### Spatial relationships with GeoDataFrames
#
# The same methods that are available on individual `shapely` geometries as we have seen above, are also available as methods on `GeoSeries` / `GeoDataFrame` objects.
#
# For example, if we call the `contains` method on the world dataset with the `paris` point, it will do this spatial check for each country in the `world` dataframe:
# In[13]:
countries.contains(paris)
# Because the above gives us a boolean result, we can use that to filter the dataframe:
# In[14]:
countries[countries.contains(paris)]
# And indeed, France is the only country in the world in which Paris is located.
# Another example, extracting the linestring of the Amazon river in South America, we can query through which countries the river flows:
# In[15]:
amazon = rivers[rivers['name'] == 'Amazonas'].geometry.squeeze()
# In[16]:
countries[countries.crosses(amazon)] # or .intersects
#
# REFERENCE:
#
# Overview of the different functions to check spatial relationships (*spatial predicate functions*):
#
# * `equals`
# * `contains`
# * `crosses`
# * `disjoint`
# * `intersects`
# * `overlaps`
# * `touches`
# * `within`
# * `covers`
#
#
# See https://shapely.readthedocs.io/en/stable/manual.html#predicates-and-relationships for an overview of those methods.
#
# See https://en.wikipedia.org/wiki/DE-9IM for all details on the semantics of those operations.
#
#
# ## Let's practice!
#
# We will again use the Paris datasets to do some exercises. Let's start importing them again:
# In[17]:
districts = geopandas.read_file("data/paris_districts_utm.geojson")
stations = geopandas.read_file("data/paris_sharing_bike_stations_utm.geojson")
#
# EXERCISE:
#
#
# * Create a shapely `Point` object for the Notre Dame cathedral (which has x/y coordinates of (452321.4581477511, 5411311.330882619))
# * Calculate the distance of each bike station to the Notre Dame.
# * Check in which district the Notre Dame is located.
#
#
# In[18]:
from shapely.geometry import Point
# In[19]:
notre_dame = Point(452321.4581477511, 5411311.330882619)
# In[20]:
stations.distance(notre_dame)
# In[21]:
districts.contains(notre_dame)
# In[22]:
districts[districts.contains(notre_dame)]
# ## Spatial operations
#
# Next to the spatial predicates that return boolean values, Shapely and GeoPandas also provide operations that return new geometric objects.
#
# **Binary operations:**
#
#
#
# **Buffer:**
#
#
#
#
# See https://shapely.readthedocs.io/en/stable/manual.html#spatial-analysis-methods for more details.
# For example, using the toy data from above, let's construct a buffer around Brussels (which returns a Polygon):
# In[23]:
geopandas.GeoSeries([belgium, brussels.buffer(1)]).plot(alpha=0.5, cmap='tab10')
# and now take the intersection, union or difference of those two polygons:
# In[24]:
brussels.buffer(1).intersection(belgium)
# In[25]:
brussels.buffer(1).union(belgium)
# In[26]:
brussels.buffer(1).difference(belgium)
# Another useful method is the `unary_union` attribute, which converts the set of geometry objects in a GeoDataFrame into a single geometry object by taking the union of all those geometries.
#
# For example, we can construct a single object for the Africa continent:
# In[27]:
africa_countries = countries[countries['continent'] == 'Africa']
# In[28]:
africa = africa_countries.unary_union
# In[29]:
africa
# In[30]:
print(str(africa)[:1000])
#
# REMEMBER:
#
# GeoPandas (and Shapely for the individual objects) provides a whole lot of basic methods to analyse the geospatial data (distance, length, centroid, boundary, convex_hull, simplify, transform, ....), much more than the few that we can touch in this tutorial.
#
#
# * An overview of all methods provided by GeoPandas can be found here: http://geopandas.readthedocs.io/en/latest/reference.html
#
#
#
#
#
# ## Let's practice!
#
#
EXERCISE: What are the districts close to the Seine?
#
#
# Below, the coordinates for the Seine river in the neighbourhood of Paris are provided as a GeoJSON-like feature dictionary (created at http://geojson.io).
#
#
#
# Based on this `seine` object, we want to know which districts are located close (maximum 150 m) to the Seine.
#
#
#
#
#
# - Create a buffer of 150 m around the Seine.
# - Check which districts intersect with this buffered object.
# - Make a visualization of the districts indicating which districts are located close to the Seine.
#
#
#
#
# In[31]:
# created a line with http://geojson.io
s_seine = geopandas.GeoDataFrame.from_features({"type":"FeatureCollection","features":[{"type":"Feature","properties":{},"geometry":{"type":"LineString","coordinates":[[2.408924102783203,48.805619828930226],[2.4092674255371094,48.81703747481909],[2.3927879333496094,48.82325391133874],[2.360687255859375,48.84912860497674],[2.338714599609375,48.85827758964043],[2.318115234375,48.8641501307046],[2.298717498779297,48.863246707697],[2.2913360595703125,48.859519915404825],[2.2594070434570312,48.8311646245967],[2.2436141967773438,48.82325391133874],[2.236919403076172,48.82347994904826],[2.227306365966797,48.828339513221444],[2.2224998474121094,48.83862215329593],[2.2254180908203125,48.84856379804802],[2.2240447998046875,48.85409863123821],[2.230224609375,48.867989496547864],[2.260265350341797,48.89192242750887],[2.300262451171875,48.910203080780285]]}}]},
crs={'init': 'epsg:4326'})
# In[32]:
# convert to local UTM zone
s_seine_utm = s_seine.to_crs(epsg=32631)
# In[33]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(20, 10))
districts.plot(ax=ax, color='grey', alpha=0.4, edgecolor='k')
s_seine_utm.plot(ax=ax)
# In[34]:
# access the single geometry object
seine = s_seine_utm.geometry.squeeze()
# In[35]:
seine_buffer = seine.buffer(150)
# In[36]:
seine_buffer
# In[37]:
districts_seine = districts[districts.intersects(seine_buffer)]
# In[38]:
fig, ax = plt.subplots(figsize=(20, 10))
districts.plot(ax=ax, color='grey', alpha=0.4, edgecolor='k')
districts_seine.plot(ax=ax, color='blue', alpha=0.4, edgecolor='k')
s_seine_utm.plot(ax=ax)
# In[ ]: