This tutorial uses AIS data published by the Danish Maritime Authority. The AIS record sample extracted for this tutorial covers vessel traffic on the 5th July 2017 near Gothenburg.
This tutorial covers:
%matplotlib inline
import urllib
import os
import pandas as pd
import movingpandas as mp
import contextily as ctx
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")
ZOOM_LEVEL = 12
w, s, e, n = 11.76, 57.61, 12, 57.73
# check number of tiles that will be downloaded
ctx.howmany(w, s, e, n, ZOOM_LEVEL, ll=True)
# download tiles
%time img, ext = ctx.bounds2img(w, s, e, n, ZOOM_LEVEL-1, ll=True)
# plot map
plt.rcParams['figure.figsize'] = (10,10)
plt.imshow(img, extent=ext, aspect='auto')
Using zoom level 12, this will download 16 tiles Wall time: 1.78 s
<matplotlib.image.AxesImage at 0x2971bcfcf28>
# Loading geodata ...
t_start = datetime.now()
df = read_file('demo/demodata_ais.gpkg')
wgs84 = df.crs
print("Finished reading {} rows in {}".format(len(df),datetime.now() - t_start))
Finished reading 84702 rows in 0:00:04.912000
Let's see what the data looks like:
df.head()
Timestamp | MMSI | NavStatus | SOG | COG | Name | ShipType | geometry | |
---|---|---|---|---|---|---|---|---|
0 | 05/07/2017 00:00:03 | 219632000 | Under way using engine | 0.0 | 270.4 | None | Undefined | POINT (11.859583 57.688167) |
1 | 05/07/2017 00:00:05 | 265650970 | Under way using engine | 0.0 | 0.5 | None | Undefined | POINT (11.841753 57.661502) |
2 | 05/07/2017 00:00:06 | 265503900 | Under way using engine | 0.0 | 0.0 | None | Undefined | POINT (11.906505 57.690767) |
3 | 05/07/2017 00:00:14 | 219632000 | Under way using engine | 0.0 | 188.4 | None | Undefined | POINT (11.859583 57.688167) |
4 | 05/07/2017 00:00:19 | 265519650 | Under way using engine | 0.0 | 357.2 | None | Undefined | POINT (11.87192 57.682332) |
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x2971bef35f8>
To convert the DataFrame to Trajectories we need to create a temporal index:
df['t'] = pd.to_datetime(df['Timestamp'], format='%d/%m/%Y %H:%M:%S')
df = df.set_index('t')
If we look at the data distributions, we can see that there are a lot of records with speed over ground (SOG) values of zero in this dataframe:
df['SOG'].hist(bins=100, figsize=(15,3))
<matplotlib.axes._subplots.AxesSubplot at 0x2971bf316d8>
Let's get rid of these rows with zero SOG:
print("Original size: {} rows".format(len(df)))
df = df[df.SOG>0]
print("Reduced to {} rows after removing 0 speed records".format(len(df)))
df['SOG'].hist(bins=100, figsize=(15,3))
Original size: 84702 rows Reduced to 33593 rows after removing 0 speed records
<matplotlib.axes._subplots.AxesSubplot at 0x2971bf5f080>
Let's see what kind of ships we have in our dataset:
df['ShipType'].value_counts().plot(kind='bar', figsize=(15,3))
<matplotlib.axes._subplots.AxesSubplot at 0x2971c0a09e8>
Finally, let's create trajectories:
MIN_LENGTH = 100 # meters
t_start = datetime.now()
trajectories = []
generalized_trajs = []
for key, values in df.groupby(['MMSI']):
if len(values) < 2:
continue
trajectory = mp.Trajectory(key, values)
if trajectory.get_length() < MIN_LENGTH:
continue
#print(trajectory)
trajectories.append(trajectory)
generalized_trajs.append(trajectory.generalize(mode='douglas-peucker', tolerance=0.001)) #(mode='min-time-delta', tolerance=timedelta(minutes=5))
print("Finished creating {} trajectories in {}".format(len(trajectories),datetime.now() - t_start))
Finished creating 77 trajectories in 0:00:37.867509
Let's give the most common ship types distinct colors. The remaining ones will be just grey:
def plot_vessel_trajectories(trajs):
shiptype_to_color = {
'Passenger': 'blue',
'HSC': 'green',
'Tanker': 'red',
'Cargo': 'orange'}
default_color = 'lightgrey'
ax = None
for traj in trajs:
ship_type = traj.df['ShipType'].iloc[0]
try:
ship_color = shiptype_to_color[ship_type]
except KeyError:
ship_color = default_color
if ax is None:
ax = traj.plot(linewidth=1, capstyle='round', column='Name', legend=True, figsize=(12,9), color=ship_color)
else:
traj.plot(ax=ax, linewidth=1, capstyle='round', column='Name', legend=True, figsize=(12,9), color=ship_color)
%%time
plot_vessel_trajectories(trajectories)
Wall time: 15.7 s
%%time
plot_vessel_trajectories(generalized_trajs)
Wall time: 4.21 s
We can also plot individual trajectories to better visualize their properties, such as the changes in NavStatus:
my_traj = trajectories[0]
my_traj.df.head()
Timestamp | MMSI | NavStatus | SOG | COG | Name | ShipType | geometry | |
---|---|---|---|---|---|---|---|---|
t | ||||||||
2017-07-05 17:32:18 | 05/07/2017 17:32:18 | 210035000 | Under way using engine | 9.8 | 52.8 | NORDIC HAMBURG | Cargo | POINT (11.80462 57.676125) |
2017-07-05 17:32:28 | 05/07/2017 17:32:28 | 210035000 | Under way using engine | 9.8 | 51.0 | NORDIC HAMBURG | Cargo | POINT (11.805283 57.676405) |
2017-07-05 17:32:36 | 05/07/2017 17:32:36 | 210035000 | Under way using engine | 9.8 | 51.0 | NORDIC HAMBURG | Cargo | POINT (11.805283 57.676405) |
2017-07-05 17:32:38 | 05/07/2017 17:32:38 | 210035000 | Under way using engine | 9.8 | 51.0 | NORDIC HAMBURG | Cargo | POINT (11.805283 57.676405) |
2017-07-05 17:32:48 | 05/07/2017 17:32:48 | 210035000 | Under way using engine | 9.7 | 52.0 | NORDIC HAMBURG | Cargo | POINT (11.806675 57.676992) |
my_traj.df.tail()
Timestamp | MMSI | NavStatus | SOG | COG | Name | ShipType | geometry | |
---|---|---|---|---|---|---|---|---|
t | ||||||||
2017-07-05 23:08:44 | 05/07/2017 23:08:44 | 210035000 | Moored | 0.1 | 96.0 | NORDIC HAMBURG | Cargo | POINT (11.845708 57.689578) |
2017-07-05 23:09:54 | 05/07/2017 23:09:54 | 210035000 | Moored | 0.1 | 96.0 | NORDIC HAMBURG | Cargo | POINT (11.845708 57.689578) |
2017-07-05 23:09:57 | 05/07/2017 23:09:57 | 210035000 | Moored | 0.1 | 96.0 | NORDIC HAMBURG | Cargo | POINT (11.845708 57.689578) |
2017-07-05 23:11:45 | 05/07/2017 23:11:45 | 210035000 | Moored | 0.1 | 96.0 | NORDIC HAMBURG | Cargo | POINT (11.845705 57.689582) |
2017-07-05 23:35:58 | 05/07/2017 23:35:58 | 210035000 | Moored | 0.1 | 276.0 | NORDIC HAMBURG | Cargo | POINT (11.845713 57.689583) |
ZOOM_LEVEL = 14 # don't increase to ZOOM_LEVEL >= 15 because this causes a tile not found 404 error
my_traj.plot(with_basemap=True, linewidth=5.0, capstyle='round', column='NavStatus', legend=True,
figsize=(9,9), url=ctx.sources.ST_TERRAIN, zoom=ZOOM_LEVEL)
Note: When plotting with basemap, you may run into missing map tiles. For Stamen, you can check map tile availability for a region and zoom level at http://maps.stamen.com/#terrain/15/57.6714/11.8120
Available tile sources are listed in https://github.com/darribas/contextily/blob/master/contextily/tile_providers.py, e.g.
ST_TONER = 'http://tile.stamen.com/toner/{z}/{x}/{y}.png'
ST_TONER_HYBRID = 'http://tile.stamen.com/toner-hybrid/{z}/{x}/{y}.png'
ST_TONER_LABELS = 'http://tile.stamen.com/toner-labels/{z}/{x}/{y}.png'
ST_TONER_LINES = 'http://tile.stamen.com/toner-lines/{z}/{x}/{y}.png'
ST_TONER_BACKGROUND = 'http://tile.stamen.com/toner-background/{z}/{x}/{y}.png'
ST_TONER_LITE = 'http://tile.stamen.com/toner-lite/{z}/{x}/{y}.png'
ST_TERRAIN = 'http://tile.stamen.com/terrain/{z}/{x}/{y}.png'
ST_TERRAIN_LABELS = 'http://tile.stamen.com/terrain-labels/{z}/{x}/{y}.png'
ST_TERRAIN_LINES = 'http://tile.stamen.com/terrain-lines/{z}/{x}/{y}.png'
ST_TERRAIN_BACKGROUND = 'http://tile.stamen.com/terrain-background/{z}/{x}/{y}.png'
ST_WATERCOLOR = 'http://tile.stamen.com/watercolor/{z}/{x}/{y}.png'
More on plotting GeoPandas GeoDataframes: http://geopandas.org/gallery/plotting_basemap_background.html
We can find ships passing under the bridge based on trajectory intersections with the bridge area.
area_of_interest = Polygon([(11.89935, 57.69270), (11.90161, 57.68902), (11.90334, 57.68967), (11.90104, 57.69354), (11.89935, 57.69270)])
intersections = []
for traj in trajectories:
if traj.to_linestring().intersects(area_of_interest):
intersections.append(traj)
print("Found {} intersections".format(len(intersections)))
Found 20 intersections
bridge_traj = intersections[0]
bridge_traj.plot(with_basemap=True, linewidth=5, capstyle='round', column='NavStatus', legend=True,
figsize=(9,9), url=ctx.sources.ST_TERRAIN, zoom=ZOOM_LEVEL)
bridge_traj.df.head()
Timestamp | MMSI | NavStatus | SOG | COG | Name | ShipType | geometry | |
---|---|---|---|---|---|---|---|---|
t | ||||||||
2017-07-05 01:21:57 | 05/07/2017 01:21:57 | 219011922 | Under way using engine | 8.8 | 227.7 | SCANDINAVIA | Tanker | POINT (11.9123 57.696335) |
2017-07-05 01:22:47 | 05/07/2017 01:22:47 | 219011922 | Under way using engine | 8.7 | 227.1 | SCANDINAVIA | Tanker | POINT (11.909473 57.694925) |
2017-07-05 01:23:06 | 05/07/2017 01:23:06 | 219011922 | Under way using engine | 8.7 | 227.5 | SCANDINAVIA | Tanker | POINT (11.908422 57.694403) |
2017-07-05 01:23:18 | 05/07/2017 01:23:18 | 219011922 | Under way using engine | 8.7 | 227.4 | SCANDINAVIA | Tanker | POINT (11.907755 57.694078) |
2017-07-05 01:23:47 | 05/07/2017 01:23:47 | 219011922 | Under way using engine | 8.7 | 226.8 | SCANDINAVIA | Tanker | POINT (11.906183 57.693272) |
Since AIS records with a speed over ground (SOG) value of zero have been removed from the dataset, we can use the split_by_observation_gap()
function to split the continuous observations into individual trips:
trips = []
for traj in trajectories:
for x in traj.split_by_observation_gap(timedelta(minutes=5)):
if x.get_length() > MIN_LENGTH:
trips.append(x)
print("Extracted {} individual trips from {} continuous vessel tracks".format(len(trips), len(trajectories)))
Extracted 302 individual trips from 77 continuous vessel tracks
Note: Splitting continous observations by observation gap is a straightforward way to extract individual trips. More sophisticated approaches require stop detection methods that do not require extended periods of time where the speed is at zero. MovingPandas so far does not implement such stop detection functions.
Let's plot the resulting trips!
%%time
plot_vessel_trajectories(trips)
Wall time: 43.9 s
Compared to plotting the original continuous observations, this visualization is much cleaner because there are no artifacts at the border of the area of interest.
Next, let's get the trip origins:
origins = []
for trip in trips:
origins.append({'geometry': trip.get_start_location(), 'id': trip.id,
'SOG': trip.df.head(1)['SOG'][0], 'ShipType': trip.df.head(1)['ShipType'][0]})
origins = GeoDataFrame(pd.DataFrame(origins), crs=wgs84)
ax = origins.to_crs(epsg=3857).plot(column='ShipType', legend=True, figsize=(6,6))
ctx.add_basemap(ax, url=ctx.sources.ST_TERRAIN, zoom=ZOOM_LEVEL)
In our data sample, trip origins can be:
ax = origins.to_crs(epsg=3857).plot(column='SOG', legend=True, figsize=(9,6))
ctx.add_basemap(ax, url=ctx.sources.ST_TERRAIN, zoom=ZOOM_LEVEL)
area_of_interest = Polygon([(11.86815, 57.68273), (11.86992, 57.68047), (11.87419, 57.68140), (11.87288, 57.68348), (11.86815, 57.68273)])
We can identify vessels that start their trip within a given area of interest by intersecting trip starting locations with our area of interest:
departures = []
for traj in trips:
if traj.get_start_location().intersects(area_of_interest):
departures.append(traj)
print("Found {} departures".format(len(departures)))
Found 12 departures
departures[1].plot(with_basemap=True, linewidth=5, capstyle='round', column='Name', legend=True,
figsize=(9,9), url=ctx.sources.ST_TERRAIN, zoom=ZOOM_LEVEL)
Let's see what kind of ships depart from here:
for traj in departures:
print("{} vessel '{}' departed at {}".format(traj.df['ShipType'].iloc[0], traj.df['Name'].iloc[0], traj.get_start_time()))
Law enforcement vessel 'KBV 010' departed at 2017-07-05 10:36:03 Law enforcement vessel 'KBV 010' departed at 2017-07-05 14:33:02 Law enforcement vessel 'KBV 048' departed at 2017-07-05 10:20:44 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 01:21:07 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 04:15:04 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 06:58:56 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 08:45:08 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 12:02:18 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 13:34:42 Pilot vessel 'PILOT 794 SE' departed at 2017-07-05 22:32:47 Pilot vessel 'PILOT 218 SE' departed at 2017-07-05 09:27:24 Pilot vessel 'PILOT 218 SE' departed at 2017-07-05 16:10:29
Of course, the same works for arrivals:
arrivals = []
for traj in trips:
if traj.get_end_location().intersects(area_of_interest):
arrivals.append(traj)
print("Found {} arrivals".format(len(arrivals)))
for traj in arrivals:
print("{} vessel '{}' arrived at {}".format(traj.df['ShipType'].iloc[0], traj.df['Name'].iloc[0], traj.get_end_time()))
Found 12 arrivals Law enforcement vessel 'KBV 010' arrived at 2017-07-05 10:51:03 Law enforcement vessel 'KBV 048' arrived at 2017-07-05 10:26:44 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 01:37:37 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 04:51:27 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 08:16:46 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 08:55:18 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 13:06:55 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 16:44:06 Pilot vessel 'PILOT 794 SE' arrived at 2017-07-05 23:58:49 Pilot vessel 'PILOT 218 SE' arrived at 2017-07-05 10:08:03 Pilot vessel 'PILOT 218 SE' arrived at 2017-07-05 17:46:12 Tanker vessel 'DANA' arrived at 2017-07-05 08:35:48