#!/usr/bin/env python # coding: utf-8 # # Population mobility and COVID-19 # # In this notebook we want to show mobility dynamics in different countries in 2020, and how much it correlates with COVID-19. # # According to Apple, data show the relative volume of direction requests per country compared to the baseline volume on January 13, 2020. "Day" is defined as midnight to midnight, Pacific time. In many countries, relative volume has increased since January 13, consistent with normal, seasonal usage of Apple Maps. In addition, we need to consider the-day-of-the-week effects. # # In most dataframes I normalize mobility so that the volume on January 13 is 1. So, for example, mobility equals 2 if it has increased twice since the start time. # # Finally, [here](https://www.apple.com/covid19/mobility) you can find the mobility data, and [here](https://github.com/datasets/covid-19) is the disease data. # In[1]: from itertools import product import numpy as np import pandas as pd import geopandas as gpd from lets_plot import * # In[2]: LetsPlot.setup_html() # ## Preparation # In[3]: def get_naturalearth_data(data_type="admin_0_countries", columns=["NAME", "geometry"]): import shapefile from shapely.geometry import shape naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \ "data/naturalearth/{0}/data.shp?raw=true".format(data_type) sf = shapefile.Reader(naturalearth_url) gdf = gpd.GeoDataFrame( [ dict(zip([field[0] for field in sf.fields[1:]], record)) for record in sf.records() ], geometry=[shape(s) for s in sf.shapes()] )[columns] gdf.columns = [col.lower() for col in gdf.columns] return gdf # In[4]: def fix_country_name(name): UNIFORM_NAMES = { 'Czechia': 'Czech Republic', 'United States': 'United States of America', 'US': 'United States of America', 'Republic of Korea': 'South Korea', } return UNIFORM_NAMES[name] if name in UNIFORM_NAMES.keys() else name # In[5]: def get_prepared_mobility_data(*, target_cols, cc_dict): mobility_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/" "master/data/covid19/applemobilitytrends.csv") mobility_df = mobility_df[mobility_df.geo_type == 'country/region'] mobility_df = mobility_df.drop(columns=['geo_type', 'alternative_name', 'sub-region', 'country']) mobility_df = mobility_df.set_index(['region', 'transportation_type']) mobility_df = mobility_df.T mobility_df = mobility_df.stack(level=0, future_stack=True) mobility_df.index = mobility_df.index.rename(names=['date', 'country']) mobility_df = mobility_df.reset_index() mobility_df.columns = [column_name.lower() for column_name in mobility_df.columns] mobility_df.country = mobility_df.country.apply(fix_country_name) mobility_df = mobility_df.fillna(0) mobility_df.date = pd.to_datetime(mobility_df.date) mobility_df = pd.melt(mobility_df, id_vars=['date', 'country'], value_vars=target_cols, \ var_name='mobility', value_name='mobility_value') mobility_df.mobility_value /= 100 mobility_df['day'] = mobility_df.date.apply(lambda dt: dt.dayofyear) mobility_df['continent'] = mobility_df.country.apply(lambda c: cc_dict[c] if c in cc_dict.keys() else np.nan) mobility_df = mobility_df[~mobility_df.continent.isna()] return mobility_df # In[6]: def get_covid19_data(): covid19_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/" "master/data/covid19/countries_aggregated.csv") covid19_df.columns = [column.lower() for column in covid19_df.columns] covid19_df.country = covid19_df.country.apply(fix_country_name) covid19_df.date = pd.to_datetime(covid19_df.date) covid19_df['recovery_index'] = covid19_df.recovered / covid19_df.confirmed covid19_df.recovery_index = covid19_df.recovery_index.fillna(0) return covid19_df # In[7]: TARGET_COLS = ['walking', 'driving'] # In[8]: world_gdf = get_naturalearth_data(columns=["NAME", "ISO_A3", "CONTINENT", "POP_EST", "GDP_MD", "geometry"]) world_gdf['name'] = world_gdf['name'].apply(fix_country_name) world_gdf.head(2) # In[9]: mobility_df = get_prepared_mobility_data(target_cols=TARGET_COLS, \ cc_dict={r.iloc[0]: r.iloc[1] for i, r \ in world_gdf[['name', 'continent']].iterrows()}) mobility_df.head(2) # In[10]: grouped_mobility_value = mobility_df.groupby(['country', 'mobility']).mobility_value combined_mobility_df = mobility_df.groupby(['country', 'mobility'])\ .continent.agg(lambda c: c.value_counts().index[0]).to_frame() combined_mobility_df = combined_mobility_df.join(grouped_mobility_value.min().to_frame(name='mobility_min'))\ .join(grouped_mobility_value.max().to_frame(name='mobility_max'))\ .join(grouped_mobility_value.mean().to_frame(name='mobility_mean'))\ .reset_index() combined_mobility_df = pd.melt(combined_mobility_df, id_vars=['country', 'mobility', 'continent'], \ value_vars=['mobility_mean', 'mobility_min', 'mobility_max'], \ var_name='mobility_agg', value_name='mobility_agg_value') combined_mobility_df.head(2) # In[11]: c19_mobility_df = get_covid19_data() c19_mobility_df = c19_mobility_df.merge(mobility_df[mobility_df.day % 7 == 4], \ on=['date', 'country'], how='inner') c19_mobility_df = c19_mobility_df[['date', 'day', 'country', 'continent', 'mobility', \ 'mobility_value', 'recovery_index']] c19_mobility_df.head(2) # ## Comparison of Aggregated Mobility Values Countrywise # In[12]: for continent in combined_mobility_df.continent.unique(): local_df = combined_mobility_df[combined_mobility_df.continent == continent] local_df = local_df[local_df.mobility_agg == 'mobility_mean'] local_df = local_df.sort_values(['mobility', 'mobility_agg_value']) display( ggplot() + \ geom_bar(aes(x='country', y='mobility_agg_value', fill='mobility'), \ data=local_df, stat='identity', color='white', \ tooltips=layer_tooltips().line('@country')\ .format('@mobility_agg_value', '.2f')\ .line('mean value|@mobility_agg_value')) + \ scale_fill_discrete(name='') + \ facet_grid(x='mobility') + \ ggtitle('Mean Mobility in %s' % continent) + \ ggsize(600, 150) + \ theme_void() ) # First of all, there is a minor correlation between "driving" and "walking" mobilities. # # Also, the biggest gap between min and max mobilities is seen in Europe and Asia. # # Finally, the most mobile citizens are Croatians, and the least mobile are Philippinos. # In[13]: multiindex_cols = ['mobility', 'mobility_agg', 'country'] df1 = combined_mobility_df.set_index(multiindex_cols).mobility_agg_value.to_frame() df2 = pd.DataFrame(list(product( TARGET_COLS, combined_mobility_df.mobility_agg.unique(), set(world_gdf['name'].unique()) - set(combined_mobility_df.country.unique()), [np.nan] )), columns=['mobility', 'mobility_agg', 'country', 'mobility_agg_value']).set_index(multiindex_cols) df = pd.concat([df1, df2]).sort_index().reset_index() gdf = gpd.GeoDataFrame( df.merge(world_gdf[['name', 'geometry']], left_on='country', right_on='name', how='right'), geometry='geometry' ) gdf = gdf[gdf.mobility_agg.isin(['mobility_min', 'mobility_max', 'mobility_mean'])] ggplot() + \ geom_map(aes(fill='mobility_agg_value'), data=gdf, color='white', \ tooltips=layer_tooltips().line('@country')\ .format('@mobility_agg_value', '.2f')\ .line('aggregated value|@mobility_agg_value')) + \ scale_fill_gradient(name='aggregated mobility', low='#1a9641', high='#d7191c', trans='log10') + \ facet_grid(x='mobility', y='mobility_agg') + \ coord_cartesian(xlim=[-10, 40],ylim=[30, 70]) + \ ggtitle('Mobility by Country in Europe') + \ ggsize(600, 600) + \ theme_void() + theme(legend_position='bottom') # During the quarantine the whole of Europe more or less complied with self-isolation requirements. # # It seems that the most difference between countries from the mobility point of view corresponds to the period of peak mobility. # ## Mobility Variation Over Time Countrywise # In[14]: for continent in c19_mobility_df.continent.unique(): local_df = c19_mobility_df[c19_mobility_df.continent == continent] display( ggplot() + \ geom_area(aes(x='date', y='mobility_value', group='country', color='country', fill='country'), \ data=local_df, alpha=.2, \ tooltips=layer_tooltips().line('@country')\ .format('@mobility_value', '.2f')\ .line('mobility value|@mobility_value')) + \ scale_x_datetime() + \ facet_grid(x='mobility') + \ ggtitle('Mobility Dynamics in %s' % continent) + \ ggsize(600, 300) + \ ylab('mobility') + theme(legend_position='none') ) # As you can see on the plot, mobility plunged in the first quarter of the year. Then it rose again slowly. # # Some plots contain a small drooping tail at the end of the graph. This may indicate a new wave of self-isolation. Well, we shall see. # ## About Connection between Mobility and CRI Index # # The CRI (COVID-19 Recovery) Index is the ratio of \[registered\] recovered to \[registered\] confirmed sick people. The higher value corresponds to a better state of healthcare (or at least so it should). The value falls within range \[0, 1\]. # In[15]: df = c19_mobility_df.groupby(['country', 'mobility'])[['mobility_value', 'recovery_index']].corr().iloc[0::2, -1] df.index = df.index.droplevel(2) df = df.rename('covid_mobility_correlation') df = df.to_frame().reset_index() gdf = gpd.GeoDataFrame( df.merge(world_gdf[['name', 'geometry']], left_on='country', right_on='name', how='right'), geometry='geometry' ) ggplot() + \ geom_map(aes(fill='covid_mobility_correlation'), data=gdf, color='white', \ tooltips=layer_tooltips().line('@name')\ .format('@covid_mobility_correlation', '.2f')\ .line('correlation value|@covid_mobility_correlation')) + \ scale_fill_gradient(name='correlation', low='#d7191c', high='#1a9641') + \ ggtitle('Correlation between Mobility and CRI') + \ ggsize(600, 450) + \ theme_void() # The greener countries (Russia, Ukraine, US, Poland, Saudi Arabia, Canada, ...) show a positive correlation between mobility and CRI. More walking suggests a higher rate of recovery. # # In the redder countries (Argentina, Morocco, Chile, Ireland, ...) the correlation is negative: lower mobility coincides with better recovery. # In[16]: df = c19_mobility_df[c19_mobility_df.continent == 'North America'] ggplot() + \ geom_path(aes(x='recovery_index', y='mobility_value'), data=df, color='#af8dc3') + \ geom_point(aes(x='recovery_index', y='mobility_value', fill='day'), \ data=df, shape=21, color='blue') + \ scale_fill_gradient(low='#91bfdb', high='#fc8d59') + \ facet_grid(x='mobility', y='country') + \ ggtitle('Combined Weekly Change of Mobility and CRI in North America') + \ ggsize(600, 450) # These plots show the dynamics of interaction between mobility and CRI for North America. For most other countries the graphs would be similar. Most of them have the "crescent" shape: first no one recovers and mobility decreases rapidly, then recovery gradually rises, and finally, mobility starts growing along with CRI.