In this notebook we want to show mobility dynamics in different countries in 2020, and how much it correlates with COVID-19.
According to Apple, data show the relative volume of direction requests per country compared to the baseline volume on January 13, 2020. "Day" is defined as midnight to midnight, Pacific time. In many countries, relative volume has increased since January 13, consistent with normal, seasonal usage of Apple Maps. In addition, we need to consider the-day-of-the-week effects.
In most dataframes I normalize mobility so that the volume on January 13 is 1. So, for example, mobility equals 2 if it has increased twice since the start time.
Finally, here you can find the mobility data, and here is the disease data.
from itertools import product
import numpy as np
import pandas as pd
import geopandas as gpd
from lets_plot import *
LetsPlot.setup_html()
def get_naturalearth_data(data_type="admin_0_countries", columns=["NAME", "geometry"]):
import shapefile
from shapely.geometry import shape
naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \
"data/naturalearth/{0}/data.shp?raw=true".format(data_type)
sf = shapefile.Reader(naturalearth_url)
gdf = gpd.GeoDataFrame(
[
dict(zip([field[0] for field in sf.fields[1:]], record))
for record in sf.records()
],
geometry=[shape(s) for s in sf.shapes()]
)[columns]
gdf.columns = [col.lower() for col in gdf.columns]
return gdf
def fix_country_name(name):
UNIFORM_NAMES = {
'Czechia': 'Czech Republic',
'United States': 'United States of America',
'US': 'United States of America',
'Republic of Korea': 'South Korea',
}
return UNIFORM_NAMES[name] if name in UNIFORM_NAMES.keys() else name
def get_prepared_mobility_data(*, target_cols, cc_dict):
mobility_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/"
"master/data/covid19/applemobilitytrends.csv")
mobility_df = mobility_df[mobility_df.geo_type == 'country/region']
mobility_df = mobility_df.drop(columns=['geo_type', 'alternative_name', 'sub-region', 'country'])
mobility_df = mobility_df.set_index(['region', 'transportation_type'])
mobility_df = mobility_df.T
mobility_df = mobility_df.stack(level=0)
mobility_df.index = mobility_df.index.rename(names=['date', 'country'])
mobility_df = mobility_df.reset_index()
mobility_df.columns = [column_name.lower() for column_name in mobility_df.columns]
mobility_df.country = mobility_df.country.apply(fix_country_name)
mobility_df = mobility_df.fillna(0)
mobility_df.date = pd.to_datetime(mobility_df.date)
mobility_df = pd.melt(mobility_df, id_vars=['date', 'country'], value_vars=target_cols, \
var_name='mobility', value_name='mobility_value')
mobility_df.mobility_value /= 100
mobility_df['day'] = mobility_df.date.apply(lambda dt: dt.dayofyear)
mobility_df['continent'] = mobility_df.country.apply(lambda c: cc_dict[c] if c in cc_dict.keys() else np.nan)
mobility_df = mobility_df[~mobility_df.continent.isna()]
return mobility_df
def get_covid19_data():
covid19_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/"
"master/data/covid19/countries_aggregated.csv")
covid19_df.columns = [column.lower() for column in covid19_df.columns]
covid19_df.country = covid19_df.country.apply(fix_country_name)
covid19_df.date = pd.to_datetime(covid19_df.date)
covid19_df['recovery_index'] = covid19_df.recovered / covid19_df.confirmed
covid19_df.recovery_index = covid19_df.recovery_index.fillna(0)
return covid19_df
TARGET_COLS = ['walking', 'driving']
world_gdf = get_naturalearth_data(columns=["NAME", "ISO_A3", "CONTINENT", "POP_EST", "GDP_MD", "geometry"])
world_gdf['name'] = world_gdf['name'].apply(fix_country_name)
world_gdf.head(2)
name | iso_a3 | continent | pop_est | gdp_md | geometry | |
---|---|---|---|---|---|---|
0 | Fiji | FJI | Oceania | 889953.0 | 5496 | MULTIPOLYGON (((180.00000 -16.06713, 180.00000... |
1 | Tanzania | TZA | Africa | 58005463.0 | 63177 | POLYGON ((33.90371 -0.95000, 34.07262 -1.05982... |
mobility_df = get_prepared_mobility_data(target_cols=TARGET_COLS, \
cc_dict={r[0]: r[1] for i, r \
in world_gdf[['name', 'continent']].iterrows()})
mobility_df.head(2)
date | country | mobility | mobility_value | day | continent | |
---|---|---|---|---|---|---|
0 | 2020-01-13 | Albania | walking | 1.0 | 13 | Europe |
1 | 2020-01-13 | Argentina | walking | 1.0 | 13 | South America |
grouped_mobility_value = mobility_df.groupby(['country', 'mobility']).mobility_value
combined_mobility_df = mobility_df.groupby(['country', 'mobility'])\
.continent.agg(lambda c: c.value_counts().index[0]).to_frame()
combined_mobility_df = combined_mobility_df.join(grouped_mobility_value.min().to_frame(name='mobility_min'))\
.join(grouped_mobility_value.max().to_frame(name='mobility_max'))\
.join(grouped_mobility_value.mean().to_frame(name='mobility_mean'))\
.reset_index()
combined_mobility_df = pd.melt(combined_mobility_df, id_vars=['country', 'mobility', 'continent'], \
value_vars=['mobility_mean', 'mobility_min', 'mobility_max'], \
var_name='mobility_agg', value_name='mobility_agg_value')
combined_mobility_df.head(2)
country | mobility | continent | mobility_agg | mobility_agg_value | |
---|---|---|---|---|---|
0 | Albania | driving | Europe | mobility_mean | 0.961949 |
1 | Albania | walking | Europe | mobility_mean | 0.819974 |
c19_mobility_df = get_covid19_data()
c19_mobility_df = c19_mobility_df.merge(mobility_df[mobility_df.day % 7 == 4], \
on=['date', 'country'], how='inner')
c19_mobility_df = c19_mobility_df[['date', 'day', 'country', 'continent', 'mobility', \
'mobility_value', 'recovery_index']]
c19_mobility_df.head(2)
date | day | country | continent | mobility | mobility_value | recovery_index | |
---|---|---|---|---|---|---|---|
0 | 2020-01-25 | 25 | Albania | Europe | walking | 0.9706 | 0.0 |
1 | 2020-01-25 | 25 | Albania | Europe | driving | 1.0238 | 0.0 |
for continent in combined_mobility_df.continent.unique():
local_df = combined_mobility_df[combined_mobility_df.continent == continent]
local_df = local_df[local_df.mobility_agg == 'mobility_mean']
local_df = local_df.sort_values(['mobility', 'mobility_agg_value'])
display(
ggplot() + \
geom_bar(aes(x='country', y='mobility_agg_value', fill='mobility'), \
data=local_df, stat='identity', color='white', \
tooltips=layer_tooltips().line('@country')\
.format('@mobility_agg_value', '.2f')\
.line('mean value|@mobility_agg_value')) + \
scale_fill_discrete(name='') + \
facet_grid(x='mobility') + \
ggtitle('Mean Mobility in %s' % continent) + \
ggsize(600, 150) + \
theme_classic() + theme(axis='blank')
)
First of all, there is a minor correlation between "driving" and "walking" mobilities.
Also, the biggest gap between min and max mobilities is seen in Europe and Asia.
Finally, the most mobile citizens are Croatians, and the least mobile are Philippinos.
multiindex_cols = ['mobility', 'mobility_agg', 'country']
df1 = combined_mobility_df.set_index(multiindex_cols).mobility_agg_value.to_frame()
df2 = pd.DataFrame(list(product(
TARGET_COLS,
combined_mobility_df.mobility_agg.unique(),
set(world_gdf['name'].unique()) - set(combined_mobility_df.country.unique()),
[np.nan]
)), columns=['mobility', 'mobility_agg', 'country', 'mobility_agg_value']).set_index(multiindex_cols)
df = pd.concat([df1, df2]).sort_index().reset_index()
gdf = gpd.GeoDataFrame(
df.merge(world_gdf[['name', 'geometry']], left_on='country', right_on='name', how='right'),
geometry='geometry'
)
gdf = gdf[gdf.mobility_agg.isin(['mobility_min', 'mobility_max', 'mobility_mean'])]
ggplot() + \
geom_map(aes(fill='mobility_agg_value'), data=gdf, color='white', \
tooltips=layer_tooltips().line('@country')\
.format('@mobility_agg_value', '.2f')\
.line('aggregated value|@mobility_agg_value')) + \
scale_fill_gradient(name='aggregated mobility', low='#1a9641', high='#d7191c', trans='log10') + \
facet_grid(x='mobility', y='mobility_agg') + \
coord_cartesian(xlim=[-10, 40],ylim=[30, 70]) + \
ggtitle('Mobility by Country in Europe') + \
ggsize(600, 600) + \
theme_classic() + theme(axis='blank', legend_position='bottom')