This tutorial uses data published on Movebank, specifically: Navigation experiments in lesser black-backed gulls (data from Wikelski et al. 2015)-gps.csv
This tutorial covers:
%matplotlib inline
import urllib
import os
import pandas as pd
import movingpandas as mp
import contextily as ctx
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")
# Loading geodata ...
t_start = datetime.now()
df = read_file('demo/demodata_gulls.gpkg')
wgs84 = df.crs
df['t'] = pd.to_datetime(df['timestamp'])
df = df.set_index('t')
print("Finished reading {} rows in {}".format(len(df),datetime.now() - t_start))
Finished reading 89867 rows in 0:00:06.010003
This is what the data looks like:
df.head()
event-id | visible | timestamp | location-long | location-lat | sensor-type | individual-taxon-canonical-name | tag-local-identifier | individual-local-identifier | study-name | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|
t | |||||||||||
2009-05-27 14:00:00 | 1082620685 | true | 2009-05-27 14:00:00.000 | 24.58617 | 61.24783 | gps | Larus fuscus | 91732 | 91732A | Navigation experiments in lesser black-backed ... | POINT (24.58617 61.24783) |
2009-05-27 20:00:00 | 1082620686 | true | 2009-05-27 20:00:00.000 | 24.58217 | 61.23267 | gps | Larus fuscus | 91732 | 91732A | Navigation experiments in lesser black-backed ... | POINT (24.58217 61.23267) |
2009-05-28 05:00:00 | 1082620687 | true | 2009-05-28 05:00:00.000 | 24.53133 | 61.18833 | gps | Larus fuscus | 91732 | 91732A | Navigation experiments in lesser black-backed ... | POINT (24.53133 61.18833) |
2009-05-28 08:00:00 | 1082620688 | true | 2009-05-28 08:00:00.000 | 24.58200 | 61.23283 | gps | Larus fuscus | 91732 | 91732A | Navigation experiments in lesser black-backed ... | POINT (24.582 61.23283) |
2009-05-28 14:00:00 | 1082620689 | true | 2009-05-28 14:00:00.000 | 24.58250 | 61.23267 | gps | Larus fuscus | 91732 | 91732A | Navigation experiments in lesser black-backed ... | POINT (24.5825 61.23267) |
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x2daf058d4a8>
Let's see how many individuals we have in the dataset:
df['individual-local-identifier'].unique()
array(['91732A', '91733A', '91734A', '91735A', '91737A', '91738A', '91739A', '91740A', '91741A', '91742A', '91743A', '91744A', '91745A', '91746A', '91747A', '91748A', '91749A', '91750A', '91751A', '91752A', '91754A', '91755A', '91756A', '91758A', '91759A', '91761A', '91762A', '91763A', '91764A', '91765A', '91766A', '91767A', '91769A', '91771A', '91774A', '91775A', '91776A', '91777A', '91778A', '91779A', '91780A', '91781A', '91782A', '91783A', '91785A', '91786A', '91787A', '91788A', '91789A', '91794A', '91795A', '91797A', '91798A', '91799A', '91800A', '91802A', '91803A', '91807A', '91809A', '91810A', '91811A', '91812A', '91813A', '91814A', '91815A', '91816A', '91819A', '91821A', '91823A', '91824A', '91825A', '91826A', '91827A', '91828A', '91829A', '91830A', '91831A', '91832A', '91835A', '91836A', '91837A', '91838A', '91839A', '91843A', '91845A', '91846A', '91848A', '91849A', '91852A', '91854A', '91858A', '91861A', '91862A', '91864A', '91865A', '91866A', '91870A', '91871A', '91872A', '91875A', '91876A', '91877A', '91878A', '91880A', '91881A', '91884A', '91885A', '91893A', '91894A', '91897A', '91900A', '91901A', '91903A', '91907A', '91908A', '91910A', '91911A', '91913A', '91916A', '91918A', '91919A', '91920A', '91921A', '91924A', '91929A', '91930A'], dtype=object)
The records per individual are not evenly distributed:
df['individual-local-identifier'].value_counts().plot(kind='bar', figsize=(17,3))
<matplotlib.axes._subplots.AxesSubplot at 0x2daf06b9320>
Finally, let's create trajectories:
MIN_LENGTH = 100 # meters
MIN_TIME_DELTA = timedelta(hours=12)
t_start = datetime.now()
all_trajectories = []
for key, values in df.groupby(['individual-local-identifier']):
if len(values) < 2:
continue
trajectory = mp.Trajectory(key, values)
if trajectory.get_length() < MIN_LENGTH:
continue
all_trajectories.append(trajectory.generalize('min-time-delta', MIN_TIME_DELTA))
print("Finished creating {} trajectories in {}".format(len(all_trajectories),datetime.now() - t_start))
Finished creating 125 trajectories in 0:00:42.264999
Let's pick out a specific individual. For example, '91916A' is the individual with most records in our dataset:
def get_individual(trajs, id):
for traj in trajs:
if traj.id == id:
return traj
my_traj = get_individual(all_trajectories, '91916A')
my_traj.df.head()
event-id | visible | timestamp | location-long | location-lat | sensor-type | individual-taxon-canonical-name | tag-local-identifier | individual-local-identifier | study-name | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|
t | |||||||||||
2009-08-15 15:00:00 | 1082625177 | true | 2009-08-15 15:00:00.000 | 7.91500 | 54.18533 | gps | Larus fuscus | 91916 | 91916A | Navigation experiments in lesser black-backed ... | POINT (7.915 54.18533) |
2009-08-16 06:00:00 | 1082625179 | true | 2009-08-16 06:00:00.000 | 8.06467 | 54.29600 | gps | Larus fuscus | 91916 | 91916A | Navigation experiments in lesser black-backed ... | POINT (8.06467 54.296) |
2009-08-16 21:00:00 | 1082625182 | true | 2009-08-16 21:00:00.000 | 9.50967 | 54.86567 | gps | Larus fuscus | 91916 | 91916A | Navigation experiments in lesser black-backed ... | POINT (9.50967 54.86567) |
2009-08-17 09:00:00 | 1082625184 | true | 2009-08-17 09:00:00.000 | 9.44750 | 54.85183 | gps | Larus fuscus | 91916 | 91916A | Navigation experiments in lesser black-backed ... | POINT (9.4475 54.85183) |
2009-08-17 21:00:00 | 1082625186 | true | 2009-08-17 21:00:00.000 | 9.48750 | 54.84850 | gps | Larus fuscus | 91916 | 91916A | Navigation experiments in lesser black-backed ... | POINT (9.487500000000001 54.8485) |
Conveniently, the plot function can automatically compute tavel speed and visualize it:
my_traj.plot(with_basemap=True, linewidth=5.0, capstyle='round', column='speed', figsize=(9,9), url=ctx.sources.ST_TERRAIN)
This individual has been travelling back and forth for quite a few years!
One way to take a closer look at this individual's travels is to split the overall track into yearly trips:
trips_by_year = my_traj.split_by_date(mode='year')
for trip in trips_by_year:
print(trip.id)
91916A_2009 91916A_2010 91916A_2011 91916A_2012 91916A_2013 91916A_2014 91916A_2015
Now we can explore individual years:
one_year = get_individual(trips_by_year, '91916A_2010')
print(one_year)
Trajectory 91916A_2010 (2010-01-01 07:00:00 to 2010-12-31 19:00:00) | Size: 700 | Length: 17266472.3m Bounds: (17.88533, 21.047, 39.234, 61.54733) LINESTRING (39.18983 21.1125, 39.18417 21.17583, 39.16433 21.22483, 39.18833 21.17083, 39.16867 21.1
one_year.plot(with_basemap=True, linewidth=5.0, capstyle='round', column='speed', figsize=(9,9), url=ctx.sources.ST_TERRAIN)
Let's see where this individual was on a specific day:
def plot_location_at_timestamp(traj, t):
pos = traj.get_position_at(t)
loc = GeoDataFrame(pd.DataFrame([{'geometry': pos, 'id': traj.id}]), crs=traj.crs)
ax = loc.to_crs(epsg=3857).plot(markersize=150, figsize=(9,9))
ax = traj.plot(ax=ax, with_basemap=True, linewidth=2.0, capstyle='round', color='black', figsize=(9,9), url=ctx.sources.ST_TONER_LITE)
return ax
plot_location_at_timestamp(one_year, datetime(2010,9,1))
plot_location_at_timestamp(one_year, datetime(2010,10,1))
Of course, it might also be of interest to see the different locations on a certain day each year:
def plot_location_at_day_of_year(traj, month, day, ax=None):
ts = [datetime(year, month, day) for year in traj.df.index.year.unique()]
return plot_locations_at_timestamps(traj, ts, ax=ax)
def plot_locations_at_timestamps(traj, ts, ax=None):
loc = get_locations_at_timestamps(traj, ts)
ax = loc.to_crs(epsg=3857).plot(ax=ax, column='id', legend=True, markersize=150, figsize=(9,9))
ax = traj.plot(ax=ax, with_basemap=True, linewidth=1.0, capstyle='round', color='black', figsize=(9,9), url=ctx.sources.ST_TONER_LITE)
return ax
def get_locations_at_timestamps(traj, ts):
pos = []
for t in ts:
if t > traj.get_start_time() and t < traj.get_end_time():
pos.append({'geometry': traj.get_position_at(t), 'id': str(t.date())})
return GeoDataFrame(pd.DataFrame(pos), crs=traj.crs)
plot_location_at_day_of_year(my_traj, month=10, day=1)
It's pretty clear that this individual does not follow the same schedule and route every year. However, it seems to always be heading to the same area Red Sea coast to spend the winter there.
Let's find its arrival times in this area:
area_of_interest = Polygon([(30, 25), (50, 25), (50, 15), (30, 15), (30, 25)])
arrivals = []
for traj in my_traj.clip(area_of_interest):
arrivals.append(traj)
print("Found {} arrivals".format(len(arrivals)))
for traj in arrivals:
print("Individual '{}' arrived at {}".format(traj.df['individual-local-identifier'].iloc[0], traj.get_start_time()))
Found 6 arrivals Individual '91916A' arrived at 2009-12-22 20:51:50.115938 Individual '91916A' arrived at 2010-10-30 20:20:14.841286 Individual '91916A' arrived at 2011-11-09 21:25:50.393351 Individual '91916A' arrived at 2012-10-14 07:33:38.658892 Individual '91916A' arrived at 2013-10-07 22:26:10.717077 Individual '91916A' arrived at 2014-10-28 19:05:32.575227
def plot_wgs84_polygon(polygon, ax=None, color=None):
gdf = GeoDataFrame(pd.DataFrame([{'geometry': polygon, 'id': 1}]), crs=wgs84)
ax = gdf.to_crs(epsg=3857).plot(ax=ax, color=color, figsize=(9,9), alpha=0.5)
return ax
ax = plot_wgs84_polygon(area_of_interest, color='yellow')
plot_locations_at_timestamps(my_traj, [traj.get_start_time() for traj in arrivals], ax)
Multiple individuals travel to this area every year. Let's have a closer look:
def get_trajectories_by_year(trajs, year):
result = []
for traj in trajs:
if traj.get_start_time().year <= year_of_interest and traj.get_end_time().year >= year_of_interest:
result.append(traj)
return result
year_of_interest = 2010
arrivals = []
for individual in all_trajectories:
for segment in individual.clip(area_of_interest):
arrivals.append(segment)
relevant = get_trajectories_by_year(arrivals, year_of_interest)
print("Found {} arrivals".format(len(relevant)))
Found 16 arrivals
for traj in relevant:
print("Individual '{}' arrived at {} (duration: {})".format(
traj.df['individual-local-identifier'].iloc[0], traj.get_start_time().date(),
traj.get_end_time()-traj.get_start_time()))
Individual '91732A' arrived at 2010-04-10 (duration: 5 days, 20:56:19.733829) Individual '91737A' arrived at 2009-12-08 (duration: 140 days, 13:57:21.364241) Individual '91761A' arrived at 2010-04-12 (duration: 12 days, 6:34:23.213944) Individual '91761A' arrived at 2010-10-04 (duration: 6 days, 15:47:55.253705) Individual '91762A' arrived at 2010-04-19 (duration: 41 days, 21:56:40.627787) Individual '91771A' arrived at 2009-12-23 (duration: 66 days, 13:52:23.990312) Individual '91789A' arrived at 2009-11-11 (duration: 550 days, 16:58:01.515402) Individual '91824A' arrived at 2010-05-05 (duration: 20:51:38.270263) Individual '91832A' arrived at 2010-04-21 (duration: 3 days, 2:25:01.514108) Individual '91832A' arrived at 2010-09-23 (duration: 1 day, 0:24:38.647092) Individual '91837A' arrived at 2010-05-03 (duration: 1 day, 20:23:47.598011) Individual '91846A' arrived at 2010-05-15 (duration: 10 days, 7:58:51.638518) Individual '91862A' arrived at 2010-01-06 (duration: 248 days, 7:44:46.789462) Individual '91910A' arrived at 2010-09-28 (duration: 2 days, 16:30:11.773474) Individual '91916A' arrived at 2009-12-22 (duration: 115 days, 18:08:52.414115) Individual '91916A' arrived at 2010-10-30 (duration: 154 days, 14:48:50.855549)
Based on the duration of the individuals' trajectory segments within our area of interest, it looks like some individuals spend the winter here while others only pass through.
For example, Individual '91761A' passed through twice? What has it been up to?
def get_individual_traj_for_year(trajs, id, year):
individual = get_individual(all_trajectories, id)
return individual.get_segment_between(datetime(year,1,1), datetime(year,12,31))
ax = plot_wgs84_polygon(area_of_interest, color='yellow')
get_individual_traj_for_year(all_trajectories, '91761A', year_of_interest).plot(ax=ax, with_basemap=True)
Turns out that this individual does not stay at the Red Sea but continues its journey into Africa.
Let's have a closer look at the trajectory segments within our area of interest:
def place_to_point(place):
pt = place.geocode
return Point(pt.longitude, pt.latitude)
def plot_place(place, buffer_size=1, color='green', ax=ax):
pt = place_to_point(place)
if place.search in place_to_color.keys():
color = place_to_color[place.search]
return plot_wgs84_polygon(pt.buffer(buffer_size,2), ax=ax, color=color)
ax = plt.figure(figsize=(9,9)).add_subplot(1,1,1)
for traj in relevant:
ax = traj.plot(ax=ax, for_basemap=True)
places = [ctx.Place(name) for name in ['Jeddah', 'Port Sudan']]
place_to_color = {'Jeddah': 'purple', 'Port Sudan': 'orange', 'Other': 'gray'}
for place in places:
plot_place(place, ax=ax)
ctx.add_basemap(ax)
for traj in relevant:
traj.context = None
for place in places:
if traj.intersects(place_to_point(place).buffer(1,2)):
traj.context = place
ax = plt.figure(figsize=(9,9)).add_subplot(1,1,1)
for traj in relevant:
place = traj.context
if place:
clipped = traj.clip(place_to_point(place).buffer(1,2))
for clip in clipped:
print("Individual '{}' was in {} from {} to {}".format(clip.df['individual-local-identifier'].iloc[0],
place.search, clip.get_start_time().date(), clip.get_end_time().date()))
ax = traj.plot(ax=ax, for_basemap=True, color=place_to_color[place.search])
else:
ax = traj.plot(ax=ax, for_basemap=True, color=place_to_color['Other'])
for place in places:
plot_place(place, ax=ax)
ctx.add_basemap(ax)
Individual '91737A' was in Jeddah from 2009-12-09 to 2009-12-20 Individual '91761A' was in Port Sudan from 2010-04-15 to 2010-04-21 Individual '91789A' was in Port Sudan from 2009-11-12 to 2009-11-12 Individual '91789A' was in Port Sudan from 2011-05-11 to 2011-05-12 Individual '91862A' was in Port Sudan from 2010-01-06 to 2010-01-07 Individual '91916A' was in Jeddah from 2009-12-24 to 2010-04-16 Individual '91916A' was in Jeddah from 2010-10-31 to 2011-03-31