#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3,lifelines,rpy2") # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr import matplotlib as mpl import pymc3 as pm import theano as thno import theano.tensor as T import datetime, time, math from dateutil import relativedelta from collections import OrderedDict SEED = 41 np.random.seed(SEED) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # pd.set_option('display.float_format', lambda x: '%.2f' % x) np.set_printoptions(edgeitems=10) np.set_printoptions(linewidth=1000) np.set_printoptions(suppress=True) np.core.arrayprint._line_width = 180 sns.set() # In[3]: from IPython.display import display, HTML from IPython.display import display_html def display_side_by_side(*args): html_str='' for df in args: if type(df) == np.ndarray: df = pd.DataFrame(df) html_str+=df.to_html() html_str = html_str.replace('table','table style="display:inline"') # print(html_str) display_html(html_str,raw=True) CSS = """ .output { flex-direction: row; } """ def display_graphs_side_by_side(*args): html_str='' for g in args: html_str += '' html_str += '
' html_str += g._repr_svg_() html_str += '
' display_html(html_str,raw=True) display(HTML("")) # In[4]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '1') get_ipython().run_line_magic('aimport', 'covid19') # * [Estimates of the severity of coronavirus disease 2019: a model-based analysis](https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30243-7/fulltext) # * duration from onset of symptoms to death to be 17·8 days (95% credible interval [CrI] 16·9–19·2) # * to hospital discharge to be 24·7 days (22·9–28·1). # * [Estimating SARS-COV-2 infections](https://observablehq.com/@danyx/estimating-sars-cov-2-infections) # * it takes on average 23 days from infection to death. # * ~5 days from infection to symptoms and ~18 days from symptoms to death so we arrive at a default value of 23 days from infection to death # * [Characteristics of COVID-19 patients dying in Italy Report based on available data on March 20th, 2020](https://www.epicentro.iss.it/coronavirus/bollettino/Report-COVID-2019_20_marzo_eng.pdf) # * Median number of days between onset of symptoms to death: 8 # * Median number of days between onset of symptoms to hospitalization: 4 # * [Clinical course and risk factors for mortality of adult inpatients with COVID-19 in Wuhan, China: a retrospective cohort study](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(20)30566-3/fulltext) # * Time from illness onset to hospital admission, days 11·0 (8·0–14·0) # * [Coronavirus: how quickly do COVID-19 symptoms develop and how long do they last?](https://patient.info/news-and-features/coronavirus-how-quickly-do-covid-19-symptoms-develop-and-how-long-do-they-last) # * ~5 days from infection to symptoms # # * [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution) # In[5]: x = np.linspace(0.0,30.0,1000) y = covid19.gamma_dist.pdf(x) fig=plt.figure(figsize=(14, 8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(111) ax.plot(x,y) # In[6]: covid19.gamma_loc,covid19.gamma_k,covid19.gamme_theta # In[5]: country_name, first_date, init_add = 'China', None, 0 # cfr_estimate, timeshift = covid19.calculate_delay_between_new_cases_and_death(country_name, first_date=first_date, init_add=init_add) # print(cfr_estimate, timeshift) # loc = max(timeshift - (gamma_mean - gamma_loc), 0.0) china_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add) china_mortality_analysis.fit() print(china_mortality_analysis.death_rate()) china_mortality_analysis.plot() # In[6]: china_mortality_analysis.df.tail() # In[8]: # china_mortality_analysis.df_lifelines_individual.observed_death.sum() # In[9]: # china_mortality_analysis.df.head() # In[10]: # china_mortality_analysis.df.tail() # In[19]: # china_mortality_analysis.fit() # In[12]: # china_mortality_analysis.wbf.print_summary() # expected_life_time = china_mortality_analysis.wbf.lambda_ * scipy.special.gamma(1 + 1 / china_mortality_analysis.wbf.rho_) # expected_life_time/365 # In[20]: # china_mortality_analysis.death_rate() # In[21]: # china_mortality_analysis.plot() # In[7]: alternative_germany_data = covid19.get_rki_df() alternative_germany_data.tail() # In[8]: country_name, first_date, init_add = 'Germany', pd.to_datetime('2020-03-09'), 0.0 germany_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_germany_data) germany_mortality_analysis.fit() print(germany_mortality_analysis.print_death_rate()) germany_mortality_analysis.plot() # In[19]: # germany_mortality_analysis.ll.shift_and_scale_popt # In[20]: # germany_mortality_analysis.ll.plot_lead_lag() # In[9]: germany_mortality_analysis.plot_infection_and_death_curves() # In[9]: germany_mortality_analysis.df.tail() # In[10]: germany_mortality_analysis.project_death_and_hospitalization() # In[9]: germany_mortality_analysis.project_death_and_hospitalization() # In[10]: country_name, first_date, init_add = 'Austria', pd.to_datetime('2020-03-12'), 600 austria_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add) austria_mortality_analysis.fit() print(austria_mortality_analysis.print_death_rate()) austria_mortality_analysis.plot() # In[11]: austria_mortality_analysis.df.tail() # In[12]: austria_mortality_analysis.project_death_and_hospitalization() # In[14]: austria_mortality_analysis.project_death_and_hospitalization() # In[15]: country_name, first_date, init_add = 'Korea, South', None, 0 south_korea_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add) south_korea_mortality_analysis.fit() # south_korea_mortality_analysis2 = covid19.MortalityAnalysis(south_korea_name, first_date=pd.to_datetime('2020-02-20'), init_add=900) # south_korea_mortality_analysis2.fit() print(south_korea_mortality_analysis.print_death_rate()) # print(south_korea_mortality_analysis2.death_rate()) # print(south_korea_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) south_korea_mortality_analysis.plot() # In[16]: south_korea_mortality_analysis.df.tail() # In[20]: # south_korea_mortality_analysis.prepend_df # In[13]: country_name, first_date, init_add = 'United Kingdom', pd.to_datetime('2020-03-05'), 800 uk_mortality_analysis = covid19.MortalityAnalysis(country_name) uk_mortality_analysis.fit() print(uk_mortality_analysis.print_death_rate()) # uk_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, mult=4.0) # uk_mortality_analysis2.fit() # print(uk_mortality_analysis2.death_rate()) # print(uk_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) uk_mortality_analysis.plot() # In[18]: uk_mortality_analysis.plot_infection_and_death_curves() # In[14]: uk_mortality_analysis.df.tail() # In[15]: uk_mortality_analysis.project_death_and_hospitalization() # In[20]: uk_mortality_analysis.project_death_and_hospitalization() # In[30]: # pd.options.mode.chained_assignment = "raise" # In[16]: country_name, first_date, init_add = 'US', pd.to_datetime('2020-02-29'), 950 us_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add) us_mortality_analysis.fit() # us_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=450, mult=1.5) # us_mortality_analysis2.fit() print(us_mortality_analysis.print_death_rate()) # print(us_mortality_analysis2.death_rate()) # print(us_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) us_mortality_analysis.plot() # In[22]: us_mortality_analysis.plot_infection_and_death_curves() # In[17]: us_mortality_analysis.df.tail() # In[18]: us_mortality_analysis.project_death_and_hospitalization() # In[24]: us_mortality_analysis.project_death_and_hospitalization() # In[19]: alternative_italy_data = covid19.get_italy_df() alternative_italy_data.tail() # In[20]: country_name, first_date, init_add = 'Italy', pd.to_datetime('2020-02-21'), 0 italy_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_italy_data) italy_mortality_analysis.fit() print(italy_mortality_analysis.print_death_rate()) # italy_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=2000, mult=4.0) # italy_mortality_analysis2.fit() # print(italy_mortality_analysis2.death_rate()) # print(italy_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) italy_mortality_analysis.plot() # In[21]: italy_mortality_analysis.df.tail() # In[22]: italy_mortality_analysis.project_death_and_hospitalization() # In[28]: italy_mortality_analysis.project_death_and_hospitalization() # In[37]: # italy_mortality_analysis2.prepend_df # In[23]: alternative_spain_data = covid19.get_spain_df() alternative_spain_data.tail() # In[24]: spain_mortality_analysis = covid19.MortalityAnalysis('Spain', df=alternative_spain_data) spain_mortality_analysis.fit() print(spain_mortality_analysis.print_death_rate()) # spain_mortality_analysis2 = covid19.MortalityAnalysis('Spain', first_date=pd.to_datetime('2020-03-03'), init_add=800, mult=3.0) # spain_mortality_analysis2.fit() # print(spain_mortality_analysis2.death_rate()) # print(spain_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) spain_mortality_analysis.plot() # In[25]: spain_mortality_analysis.df.tail() # In[26]: spain_mortality_analysis.project_death_and_hospitalization() # In[32]: spain_mortality_analysis.project_death_and_hospitalization() # In[27]: alternative_france_data = covid19.get_france_df() alternative_france_data.tail() # In[46]: france_mortality_analysis = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), df=alternative_france_data) france_mortality_analysis.fit() print(france_mortality_analysis.print_death_rate()) # france_mortality_analysis2 = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), init_add=500, mult=4) # france_mortality_analysis2.fit() # print(france_mortality_analysis2.death_rate()) # print(france_mortality_analysis2.prepend_df['confirmed'].iloc[-1]) france_mortality_analysis.plot() # In[35]: france_mortality_analysis.plot_infection_and_death_curves() # In[36]: france_mortality_analysis.df.tail() # | date | note | # | --- | --- | # | 2020-04-20 | expected_death (17296.0) < today_death(19718); death rate via survival analysis (15.36) << death rate via affine transform of curve estimates (18.7) | # | 2020-04-21 | expected_death (17520.0) < today_death(20265); death rate via survival analysis (15.28) << death rate via affine transform of curve estimates (18.9) | # | 2020-04-24 | expected_death (19208.0) < today_death(21856); death rate via survival analysis (15.9) << death rate via affine transform of curve estimates (19.2) | # In[37]: france_mortality_analysis.project_death_and_hospitalization() # In[34]: france_mortality_analysis.project_death_and_hospitalization() # In[33]: import rpy2 print(rpy2.__version__) # In[34]: import rpy2.robjects.packages as rpackages baseR = rpackages.importr('base') print(baseR.R_Version().rx('version.string')) # In[35]: # from rpy2.rinterface import R_VERSION_BUILD # print(R_VERSION_BUILD) # In[36]: import IPython.display import rpy2, rpy2.robjects, rpy2.robjects.pandas2ri, rpy2.rinterface, rpy2.robjects.packages, rpy2.interactive, rpy2.robjects.lib.ggplot2, rpy2.robjects.lib.grdevices rpy2.robjects.pandas2ri.activate() from rpy2.robjects.packages import importr # import R's "base" package base = importr('base') # import rpy2's package module import rpy2.robjects.packages as rpackages # import R's utility package utils = rpackages.importr('utils') # select a mirror for R packages utils.chooseCRANmirror(ind=1) # select the first mirror in the list # R package names packnames = ('LexisPlotR',) # R vector of strings from rpy2.robjects.vectors import StrVector # In[37]: grdevices = rpy2.robjects.packages.importr('grDevices') # Selectively install what needs to be install. # We are fancy, just because we can. names_to_install = [x for x in packnames if not rpackages.isinstalled(x)] if len(names_to_install) > 0: utils.install_packages(StrVector(names_to_install)) lexis = importr('LexisPlotR') lexis # In[38]: lexis_grid = rpy2.robjects.r['lexis.grid'] lexis_lifeline = rpy2.robjects.r['lexis.lifeline'] # In[39]: def plot_lexis(mortality_analysis_instance): mylexis = lexis_grid(year_start = 2020, year_end = 2021, age_start = 0, age_end = 1) # lwd = 0.1 alpha = 1.0 ix_present = ~mortality_analysis_instance.df_lifelines_individual.observed_death ix_lost = mortality_analysis_instance.df_lifelines_individual.observed_death mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_present], exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_present], colour = "orange", alpha = alpha, lwd = 0.4) mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_lost] , exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_lost] , colour = "blue" , alpha = alpha, lwd = 0.4, lineends = True) with rpy2.robjects.lib.grdevices.render_to_bytesio(grdevices.png, width=1.5*1024, height=1.5*896, res=90) as img: rpy2.robjects.r.print(mylexis) IPython.display.display(IPython.display.Image(data=img.getvalue(), format='png', embed=True)) # In[40]: # plot_lexis(italy_mortality_analysis) # In[41]: # plot_lexis(italy_mortality_analysis2) # In[42]: plot_lexis(south_korea_mortality_analysis) # In[43]: plot_lexis(germany_mortality_analysis) # In[47]: plot_lexis(france_mortality_analysis)