#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', "-a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3,lifelines,rpy2")
# In[2]:
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 41
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
# In[3]:
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='
'
for g in args:
html_str += ''
html_str += g._repr_svg_()
html_str += ' | '
html_str += '
'
display_html(html_str,raw=True)
display(HTML(""))
# In[4]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '1')
get_ipython().run_line_magic('aimport', 'covid19')
# * [Estimates of the severity of coronavirus disease 2019: a model-based analysis](https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30243-7/fulltext)
# * duration from onset of symptoms to death to be 17·8 days (95% credible interval [CrI] 16·9–19·2)
# * to hospital discharge to be 24·7 days (22·9–28·1).
# * [Estimating SARS-COV-2 infections](https://observablehq.com/@danyx/estimating-sars-cov-2-infections)
# * it takes on average 23 days from infection to death.
# * ~5 days from infection to symptoms and ~18 days from symptoms to death so we arrive at a default value of 23 days from infection to death
# * [Characteristics of COVID-19 patients dying in Italy Report based on available data on March 20th, 2020](https://www.epicentro.iss.it/coronavirus/bollettino/Report-COVID-2019_20_marzo_eng.pdf)
# * Median number of days between onset of symptoms to death: 8
# * Median number of days between onset of symptoms to hospitalization: 4
# * [Clinical course and risk factors for mortality of adult inpatients with COVID-19 in Wuhan, China: a retrospective cohort study](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(20)30566-3/fulltext)
# * Time from illness onset to hospital admission, days 11·0 (8·0–14·0)
# * [Coronavirus: how quickly do COVID-19 symptoms develop and how long do they last?](https://patient.info/news-and-features/coronavirus-how-quickly-do-covid-19-symptoms-develop-and-how-long-do-they-last)
# * ~5 days from infection to symptoms
#
# * [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
# In[5]:
x = np.linspace(0.0,30.0,1000)
y = covid19.gamma_dist.pdf(x)
fig=plt.figure(figsize=(14, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.plot(x,y)
# In[6]:
covid19.gamma_loc,covid19.gamma_k,covid19.gamme_theta
# In[5]:
country_name, first_date, init_add = 'China', None, 0
# cfr_estimate, timeshift = covid19.calculate_delay_between_new_cases_and_death(country_name, first_date=first_date, init_add=init_add)
# print(cfr_estimate, timeshift)
# loc = max(timeshift - (gamma_mean - gamma_loc), 0.0)
china_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
china_mortality_analysis.fit()
print(china_mortality_analysis.death_rate())
china_mortality_analysis.plot()
# In[6]:
china_mortality_analysis.df.tail()
# In[8]:
# china_mortality_analysis.df_lifelines_individual.observed_death.sum()
# In[9]:
# china_mortality_analysis.df.head()
# In[10]:
# china_mortality_analysis.df.tail()
# In[19]:
# china_mortality_analysis.fit()
# In[12]:
# china_mortality_analysis.wbf.print_summary()
# expected_life_time = china_mortality_analysis.wbf.lambda_ * scipy.special.gamma(1 + 1 / china_mortality_analysis.wbf.rho_)
# expected_life_time/365
# In[20]:
# china_mortality_analysis.death_rate()
# In[21]:
# china_mortality_analysis.plot()
# In[7]:
alternative_germany_data = covid19.get_rki_df()
alternative_germany_data.tail()
# In[8]:
country_name, first_date, init_add = 'Germany', pd.to_datetime('2020-03-09'), 0.0
germany_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_germany_data)
germany_mortality_analysis.fit()
print(germany_mortality_analysis.print_death_rate())
germany_mortality_analysis.plot()
# In[19]:
# germany_mortality_analysis.ll.shift_and_scale_popt
# In[20]:
# germany_mortality_analysis.ll.plot_lead_lag()
# In[9]:
germany_mortality_analysis.plot_infection_and_death_curves()
# In[9]:
germany_mortality_analysis.df.tail()
# In[10]:
germany_mortality_analysis.project_death_and_hospitalization()
# In[9]:
germany_mortality_analysis.project_death_and_hospitalization()
# In[10]:
country_name, first_date, init_add = 'Austria', pd.to_datetime('2020-03-12'), 600
austria_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
austria_mortality_analysis.fit()
print(austria_mortality_analysis.print_death_rate())
austria_mortality_analysis.plot()
# In[11]:
austria_mortality_analysis.df.tail()
# In[12]:
austria_mortality_analysis.project_death_and_hospitalization()
# In[14]:
austria_mortality_analysis.project_death_and_hospitalization()
# In[15]:
country_name, first_date, init_add = 'Korea, South', None, 0
south_korea_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
south_korea_mortality_analysis.fit()
# south_korea_mortality_analysis2 = covid19.MortalityAnalysis(south_korea_name, first_date=pd.to_datetime('2020-02-20'), init_add=900)
# south_korea_mortality_analysis2.fit()
print(south_korea_mortality_analysis.print_death_rate())
# print(south_korea_mortality_analysis2.death_rate())
# print(south_korea_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
south_korea_mortality_analysis.plot()
# In[16]:
south_korea_mortality_analysis.df.tail()
# In[20]:
# south_korea_mortality_analysis.prepend_df
# In[13]:
country_name, first_date, init_add = 'United Kingdom', pd.to_datetime('2020-03-05'), 800
uk_mortality_analysis = covid19.MortalityAnalysis(country_name)
uk_mortality_analysis.fit()
print(uk_mortality_analysis.print_death_rate())
# uk_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, mult=4.0)
# uk_mortality_analysis2.fit()
# print(uk_mortality_analysis2.death_rate())
# print(uk_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
uk_mortality_analysis.plot()
# In[18]:
uk_mortality_analysis.plot_infection_and_death_curves()
# In[14]:
uk_mortality_analysis.df.tail()
# In[15]:
uk_mortality_analysis.project_death_and_hospitalization()
# In[20]:
uk_mortality_analysis.project_death_and_hospitalization()
# In[30]:
# pd.options.mode.chained_assignment = "raise"
# In[16]:
country_name, first_date, init_add = 'US', pd.to_datetime('2020-02-29'), 950
us_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
us_mortality_analysis.fit()
# us_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=450, mult=1.5)
# us_mortality_analysis2.fit()
print(us_mortality_analysis.print_death_rate())
# print(us_mortality_analysis2.death_rate())
# print(us_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
us_mortality_analysis.plot()
# In[22]:
us_mortality_analysis.plot_infection_and_death_curves()
# In[17]:
us_mortality_analysis.df.tail()
# In[18]:
us_mortality_analysis.project_death_and_hospitalization()
# In[24]:
us_mortality_analysis.project_death_and_hospitalization()
# In[19]:
alternative_italy_data = covid19.get_italy_df()
alternative_italy_data.tail()
# In[20]:
country_name, first_date, init_add = 'Italy', pd.to_datetime('2020-02-21'), 0
italy_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_italy_data)
italy_mortality_analysis.fit()
print(italy_mortality_analysis.print_death_rate())
# italy_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=2000, mult=4.0)
# italy_mortality_analysis2.fit()
# print(italy_mortality_analysis2.death_rate())
# print(italy_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
italy_mortality_analysis.plot()
# In[21]:
italy_mortality_analysis.df.tail()
# In[22]:
italy_mortality_analysis.project_death_and_hospitalization()
# In[28]:
italy_mortality_analysis.project_death_and_hospitalization()
# In[37]:
# italy_mortality_analysis2.prepend_df
# In[23]:
alternative_spain_data = covid19.get_spain_df()
alternative_spain_data.tail()
# In[24]:
spain_mortality_analysis = covid19.MortalityAnalysis('Spain', df=alternative_spain_data)
spain_mortality_analysis.fit()
print(spain_mortality_analysis.print_death_rate())
# spain_mortality_analysis2 = covid19.MortalityAnalysis('Spain', first_date=pd.to_datetime('2020-03-03'), init_add=800, mult=3.0)
# spain_mortality_analysis2.fit()
# print(spain_mortality_analysis2.death_rate())
# print(spain_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
spain_mortality_analysis.plot()
# In[25]:
spain_mortality_analysis.df.tail()
# In[26]:
spain_mortality_analysis.project_death_and_hospitalization()
# In[32]:
spain_mortality_analysis.project_death_and_hospitalization()
# In[27]:
alternative_france_data = covid19.get_france_df()
alternative_france_data.tail()
# In[46]:
france_mortality_analysis = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), df=alternative_france_data)
france_mortality_analysis.fit()
print(france_mortality_analysis.print_death_rate())
# france_mortality_analysis2 = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), init_add=500, mult=4)
# france_mortality_analysis2.fit()
# print(france_mortality_analysis2.death_rate())
# print(france_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
france_mortality_analysis.plot()
# In[35]:
france_mortality_analysis.plot_infection_and_death_curves()
# In[36]:
france_mortality_analysis.df.tail()
# | date | note |
# | --- | --- |
# | 2020-04-20 | expected_death (17296.0) < today_death(19718); death rate via survival analysis (15.36) << death rate via affine transform of curve estimates (18.7) |
# | 2020-04-21 | expected_death (17520.0) < today_death(20265); death rate via survival analysis (15.28) << death rate via affine transform of curve estimates (18.9) |
# | 2020-04-24 | expected_death (19208.0) < today_death(21856); death rate via survival analysis (15.9) << death rate via affine transform of curve estimates (19.2) |
# In[37]:
france_mortality_analysis.project_death_and_hospitalization()
# In[34]:
france_mortality_analysis.project_death_and_hospitalization()
# In[33]:
import rpy2
print(rpy2.__version__)
# In[34]:
import rpy2.robjects.packages as rpackages
baseR = rpackages.importr('base')
print(baseR.R_Version().rx('version.string'))
# In[35]:
# from rpy2.rinterface import R_VERSION_BUILD
# print(R_VERSION_BUILD)
# In[36]:
import IPython.display
import rpy2, rpy2.robjects, rpy2.robjects.pandas2ri, rpy2.rinterface, rpy2.robjects.packages, rpy2.interactive, rpy2.robjects.lib.ggplot2, rpy2.robjects.lib.grdevices
rpy2.robjects.pandas2ri.activate()
from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')
# import rpy2's package module
import rpy2.robjects.packages as rpackages
# import R's utility package
utils = rpackages.importr('utils')
# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list
# R package names
packnames = ('LexisPlotR',)
# R vector of strings
from rpy2.robjects.vectors import StrVector
# In[37]:
grdevices = rpy2.robjects.packages.importr('grDevices')
# Selectively install what needs to be install.
# We are fancy, just because we can.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
utils.install_packages(StrVector(names_to_install))
lexis = importr('LexisPlotR')
lexis
# In[38]:
lexis_grid = rpy2.robjects.r['lexis.grid']
lexis_lifeline = rpy2.robjects.r['lexis.lifeline']
# In[39]:
def plot_lexis(mortality_analysis_instance):
mylexis = lexis_grid(year_start = 2020, year_end = 2021, age_start = 0, age_end = 1) # lwd = 0.1
alpha = 1.0
ix_present = ~mortality_analysis_instance.df_lifelines_individual.observed_death
ix_lost = mortality_analysis_instance.df_lifelines_individual.observed_death
mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_present], exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_present], colour = "orange", alpha = alpha, lwd = 0.4)
mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_lost] , exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_lost] , colour = "blue" , alpha = alpha, lwd = 0.4, lineends = True)
with rpy2.robjects.lib.grdevices.render_to_bytesio(grdevices.png, width=1.5*1024, height=1.5*896, res=90) as img:
rpy2.robjects.r.print(mylexis)
IPython.display.display(IPython.display.Image(data=img.getvalue(), format='png', embed=True))
# In[40]:
# plot_lexis(italy_mortality_analysis)
# In[41]:
# plot_lexis(italy_mortality_analysis2)
# In[42]:
plot_lexis(south_korea_mortality_analysis)
# In[43]:
plot_lexis(germany_mortality_analysis)
# In[47]:
plot_lexis(france_mortality_analysis)