%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
cs224 last updated: 2020-08-03 CPython 3.6.10 IPython 7.15.0 numpy 1.18.5 xarray 0.15.1 scipy 1.5.0 pandas 1.0.5 sklearn 0.23.1 matplotlib 3.2.2 seaborn 0.10.1 pymc3 3.9.2
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import sklearn, sklearn.linear_model
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 42
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
# sns.set_style("whitegrid")
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='<table><tr>'
for g in args:
html_str += '<td>'
html_str += g._repr_svg_()
html_str += '</td>'
html_str += '</tr></table>'
display_html(html_str,raw=True)
display(HTML("<style>.container { width:70% !important; }</style>"))
%load_ext autoreload
%autoreload 1
%aimport covid19
df = covid19.get_rki_df()
cbr_germany = covid19.CasesByRegion('Germany', df=df)
cbr_germany.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 208959 | 193483 | 9135 | 595 | 0 | 4 |
2020-07-30 | 209486 | 193483 | 9144 | 527 | 0 | 9 |
2020-07-31 | 210009 | 193483 | 9147 | 523 | 0 | 3 |
2020-08-01 | 210305 | 193483 | 9154 | 296 | 0 | 7 |
2020-08-02 | 210395 | 193483 | 9154 | 90 | 0 | 0 |
cbr_germany.plot_daily_stats()
<matplotlib.axes._subplots.AxesSubplot at 0x7f572fb7a828>
cbr_germany.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 303.88584981440266; growth-rate: 0.002, date:2020-09-10 00:00:00, projected value: 390.8530294495339
# country_name, first_date, init_add, restriction_start_date = 'Germany', pd.to_datetime('2020-03-09'), 0, datetime.datetime(2020, 3, 22)
# ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, in_df=cbr_germany.df, first_date=first_date, init_add=init_add)
# if len(lpopt) == 4:
# steady_state_rate = lpopt[1] * lpopt[3]
# else:
# steady_state_rate = 0.0
# print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate)
# fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
# ax = plt.subplot(1,1,1)
# ldf[['confirmed', label + '_fit']].iloc[1:,:].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5);
# ax.axvline(restriction_start_date);
# ax2 = ax.twinx()
# ldf[[label + '_fit_diff']].iloc[1:,:].plot(ax=ax2, color=['steelblue']);
# lbl = 'confirmed' + '_diff'
# ldf[[lbl]].iloc[1:,:].reset_index().plot.scatter(ax=ax2, x = 'index', y = lbl, c='limegreen')
# l = len(ax.get_yticks())
# a1 = ax.get_yticks()[0]
# e1 = ax.get_yticks()[-1]
# a2 = ax2.get_yticks()[0]
# e2 = ax2.get_yticks()[-1]
# ax.set_yticks(np.linspace(a1, e1, l));
# ax2.set_yticks(np.linspace(a2, e2, l));
cbr_germany.fit_df0[['fit_diff']].apply(['max'])
fit_diff | |
---|---|
max | 4867.318507 |
cbr_germany.calculate_R_estimates()
cbr_germany.R().round(3)
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 0.998 | 0.843 | 0.737 | 0.894 | 0.912 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f572f149e48>
# --------------------------------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------------------------------
df = covid19.get_rki_df(state='Bayern')
cbr_bavaria = covid19.CasesByRegion('Bavaria', df=df)
cbr_bavaria.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 50944 | 47265 | 2622 | 81 | 0 | 0 |
2020-07-30 | 51007 | 47265 | 2622 | 63 | 0 | 0 |
2020-07-31 | 51114 | 47265 | 2622 | 107 | 0 | 0 |
2020-08-01 | 51154 | 47265 | 2622 | 40 | 0 | 0 |
2020-08-02 | 51155 | 47265 | 2622 | 1 | 0 | 0 |
einwohner_deutschland = 83019213.0
einwohner_bayern = 13076721.0
prozent_bayern = einwohner_bayern / einwohner_deutschland
bavaria_new_confirmed_threshold = (100.0 * prozent_bayern) // 1 + 1
bavaria_new_confirmed_threshold
16.0
cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 80.69203984179356; growth-rate: 0.001, date:2020-09-10 00:00:00, projected value: 58.88780206634304
cbr_bavaria.calculate_R_estimates()
cbr_bavaria.R().round(3)
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 0.999 | 0.985 | 0.928 | 1.093 | 1.025 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f57281989b0>
df = covid19.get_rki_df(state='Nordrhein-Westfalen')
cbr_nrw = covid19.CasesByRegion('NRW', df=df)
cbr_nrw.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 48748 | 44540 | 1738 | 250 | 0 | 1 |
2020-07-30 | 48976 | 44540 | 1738 | 228 | 0 | 0 |
2020-07-31 | 49217 | 44540 | 1738 | 241 | 0 | 0 |
2020-08-01 | 49363 | 44540 | 1738 | 146 | 0 | 0 |
2020-08-02 | 49434 | 44540 | 1738 | 71 | 0 | 0 |
einwohner_nrw = 17932651.0
prozent_nrw = einwohner_nrw / einwohner_deutschland
nrw_new_confirmed_threshold = (100.0 * prozent_nrw) // 1 + 1
nrw_new_confirmed_threshold
22.0
cbr_nrw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=nrw_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_nrw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1505: UserWarning:Exception in curve_fit 1 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 800./ [76940.460289 0.04298913 -0.10060466]
sigmoid+asymmetric+linear: seor: 93.32008759982396; growth-rate: 0.003, date:2020-09-10 00:00:00, projected value: 155.4332754077275
cbr_nrw.calculate_R_estimates()
cbr_nrw.R().round(3)
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 1.0 | 0.945 | 0.921 | 1.007 | 0.984 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_nrw.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f5718648518>
covid19.rki_data_df.Bundesland.unique()
array(['Schleswig-Holstein', 'Hamburg', 'Niedersachsen', 'Bremen', 'Nordrhein-Westfalen', 'Hessen', 'Rheinland-Pfalz', 'Baden-Württemberg', 'Bayern', 'Saarland', 'Berlin', 'Brandenburg', 'Mecklenburg-Vorpommern', 'Sachsen', 'Sachsen-Anhalt', 'Thüringen'], dtype=object)
df = covid19.get_rki_df(state='Baden-W')
cbr_bw = covid19.CasesByRegion('BW', df=df)
cbr_bw.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 37218 | 34616 | 1847 | 62 | 0 | 0 |
2020-07-30 | 37272 | 34616 | 1847 | 54 | 0 | 0 |
2020-07-31 | 37272 | 34616 | 1847 | 0 | 0 | 0 |
2020-08-01 | 37272 | 34616 | 1847 | 0 | 0 | 0 |
2020-08-02 | 37272 | 34616 | 1847 | 0 | 0 | 0 |
einwohner_bw = 11069533.0
prozent_bw = einwohner_bw / einwohner_deutschland
bw_new_confirmed_threshold = (100.0 * prozent_bw) // 1 + 1
bw_new_confirmed_threshold
14.0
cbr_bw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bw_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 60.543649370739864; growth-rate: 0.001, date:2020-09-10 00:00:00, projected value: 27.28878641816627
cbr_bw.calculate_R_estimates()
cbr_bw.R().round(3)
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 0.993 | 0.567 | 0.0 | 0.421 | 0.66 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bw.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f5718452390>
df = covid19.get_rki_df(county='LK Traunstein')
cbr_traunstein = covid19.CasesByRegion('LK Traunstein', df=df)
cbr_traunstein.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 1316 | 1224 | 88 | 0 | 0 | 0 |
2020-07-30 | 1316 | 1224 | 88 | 0 | 0 | 0 |
2020-07-31 | 1316 | 1224 | 88 | 0 | 0 | 0 |
2020-08-01 | 1316 | 1224 | 88 | 0 | 0 | 0 |
2020-08-02 | 1316 | 1224 | 88 | 0 | 0 | 0 |
tage_inzidenz = 10
# https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html
einwohner_lk_traunstein = 177089
einwohner_sk_regensburg = 152610
einwohner_lk_regensburg = 193572
tage_inzidenz_lk_traunstein = cbr_traunstein.df['new_confirmed'][-tage_inzidenz:].sum()
round(tage_inzidenz_lk_traunstein,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz * 100000,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz * 100000 * 7,1)
(2, 0.1, 0.8)
cbr_traunstein.plot_daily_stats()
<matplotlib.axes._subplots.AxesSubplot at 0x7f57183764e0>
cbr_traunstein.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_traunstein.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric: seor: 4.889133887357203, max asymptotic: 1314.5034715455804; growth-rate: 0.0, date:2020-09-10 00:00:00, projected value: 0.006071546213641148
cbr_traunstein.calculate_R_estimates()
cbr_traunstein.R().round(3)
/home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:218: ValueWarning:A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 0.611 | 0.0 | 0.0 | 0.535 | 0.382 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_traunstein.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f57180f6f98>
df = covid19.get_rki_df(county='SK Regensburg')
cbr_sk_regensburg = covid19.CasesByRegion('SK Regensburg', df=df)
cbr_sk_regensburg.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 731 | 716 | 10 | 0 | 0 | 0 |
2020-07-30 | 731 | 716 | 10 | 0 | 0 | 0 |
2020-07-31 | 731 | 716 | 10 | 0 | 0 | 0 |
2020-08-01 | 731 | 716 | 10 | 0 | 0 | 0 |
2020-08-02 | 731 | 716 | 10 | 0 | 0 | 0 |
tage_inzidenz_sk_regensburg = cbr_sk_regensburg.df['new_confirmed'][-tage_inzidenz:].sum()
round(tage_inzidenz_sk_regensburg,1), round(tage_inzidenz_sk_regensburg / einwohner_sk_regensburg / tage_inzidenz * 100000, 1), round(tage_inzidenz_sk_regensburg / einwohner_sk_regensburg / tage_inzidenz * 100000 * 7, 1)
(1, 0.1, 0.5)
cbr_sk_regensburg.plot_daily_stats()
<matplotlib.axes._subplots.AxesSubplot at 0x7f571802cd30>
df = covid19.get_rki_df(county='LK Regensburg')
cbr_lk_regensburg = covid19.CasesByRegion('LK Regensburg', df=df)
cbr_lk_regensburg.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 501 | 482 | 15 | 0 | 0 | 0 |
2020-07-30 | 504 | 482 | 15 | 3 | 0 | 0 |
2020-07-31 | 506 | 482 | 15 | 2 | 0 | 0 |
2020-08-01 | 506 | 482 | 15 | 0 | 0 | 0 |
2020-08-02 | 506 | 482 | 15 | 0 | 0 | 0 |
tage_inzidenz_lk_regensburg = cbr_lk_regensburg.df['new_confirmed'][-tage_inzidenz:].sum()
tage_inzidenz_lk_regensburg, round(tage_inzidenz_lk_regensburg / einwohner_lk_regensburg / tage_inzidenz * 100000, 1), round(tage_inzidenz_lk_regensburg / einwohner_lk_regensburg / tage_inzidenz * 100000 * 7, 1)
(7, 0.4, 2.5)
cbr_lk_regensburg.plot_daily_stats()
<matplotlib.axes._subplots.AxesSubplot at 0x7f5710737470>
# --------------------------------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------------------------------
df = covid19.get_rki_df(time_anchor_column_name='Meldedatum')
cbr_germany2 = covid19.CasesByRegion('Germany', df=df)
cbr_germany2.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 208280 | 193432 | 9135 | 900 | 34 | 4 |
2020-07-30 | 209112 | 193461 | 9144 | 832 | 29 | 9 |
2020-07-31 | 209851 | 193481 | 9147 | 739 | 20 | 3 |
2020-08-01 | 210280 | 193482 | 9154 | 429 | 1 | 7 |
2020-08-02 | 210395 | 193483 | 9154 | 115 | 1 | 0 |
cbr_germany2.plot_daily_stats(days=60)
<matplotlib.axes._subplots.AxesSubplot at 0x7f571058d320>
cbr_germany2.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany2.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 558.2499243693152; growth-rate: 0.002, date:2020-09-10 00:00:00, projected value: 454.5993090898291
cbr_germany2.fit_df0[['fit_diff']].apply(['max'])
fit_diff | |
---|---|
max | 5331.216342 |
cbr_germany2.calculate_R_estimates()
cbr_germany2.R().round(3)
/home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:218: ValueWarning:A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-31 | 1.0 | 1.108 | 0.977 | 1.113 | 1.074 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany2.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f5718139278>
cbr_germany3 = covid19.CasesByRegion('Germany')
cbr_germany3.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
2020-07-29 | 208546 | 191279 | 9135 | 839 | 568 | 4 |
2020-07-30 | 209535 | 191551 | 9144 | 989 | 272 | 9 |
2020-07-31 | 210399 | 191992 | 9147 | 864 | 441 | 3 |
2020-08-01 | 211005 | 192636 | 9154 | 606 | 644 | 7 |
2020-08-02 | 211220 | 192908 | 9154 | 215 | 272 | 0 |
cbr_germany3.plot_daily_stats(days=60)
<matplotlib.axes._subplots.AxesSubplot at 0x7f571847aef0>
cbr_germany3.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany3.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 528.5346737887775; growth-rate: 0.002, date:2020-09-10 00:00:00, projected value: 466.26140900032766
cbr_germany3.calculate_R_estimates()
cbr_germany3.R().round(3)
/home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:218: ValueWarning:A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
2020-07-31 | 1.0 | 1.325 | 1.321 | 1.294 | 1.207 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany3.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f57280de358>
import requests
# https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting.html
# rki_nowcasting_data_url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting_Zahlen.xlsx'
rki_nowcasting_data_url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting_Zahlen.xlsx?__blob=publicationFile'
r = requests.get(rki_nowcasting_data_url, allow_redirects=True) # to get content after redirection
# r.url, r.content
xd = pd.ExcelFile(r.content)
rki_nowcasting_df_ = xd.parse(xd.sheet_names[-1])
rki_nowcasting_df_.to_excel("rki_nowcasting_data.xlsx")
rki_nowcasting_df_.tail()
Datum des Erkrankungsbeginns | Punktschätzer der Anzahl Neuerkrankungen (ohne Glättung) | Untere Grenze des 95%-Prädiktionsintervalls der Anzahl Neuerkrankungen (ohne Glä | Obere Grenze des 95%-Prädiktionsintervalls der Anzahl Neuerkrankungen (ohne Glät | Punktschätzer der Anzahl Neuerkrankungen | Untere Grenze des 95%-Prädiktionsintervalls der Anzahl Neuerkrankungen | Obere Grenze des 95%-Prädiktionsintervalls der Anzahl Neuerkrankungen | Punktschätzer der Reproduktionszahl R | Untere Grenze des 95%-Prädiktionsintervalls der Reproduktionszahl R | Obere Grenze des 95%-Prädiktionsintervalls der Reproduktionszahl R | Punktschätzer des 7-Tage-R Wertes | Untere Grenze des 95%-Prädiktionsintervalls des 7-Tage-R Wertes | Obere Grenze des 95%-Prädiktionsintervalls des 7-Tage-R Wertes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
145 | 2020-07-25 | 612 | 472 | 751 | 673 | 561 | 792 | 1.15 | 1.05 | 1.26 | 1.15 | 1.08 | 1.22 |
146 | 2020-07-26 | 620 | 461 | 766 | 647 | 520 | 773 | 1.00 | 0.89 | 1.13 | 1.14 | 1.06 | 1.24 |
147 | 2020-07-27 | 845 | 590 | 1139 | 691 | 524 | 864 | 1.01 | 0.87 | 1.14 | 1.12 | 1.01 | 1.24 |
148 | 2020-07-28 | 788 | 496 | 1095 | 716 | 505 | 938 | 1.03 | 0.84 | 1.22 | 1.09 | 0.95 | 1.22 |
149 | 2020-07-29 | 728 | 355 | 1073 | 745 | 475 | 1018 | 1.11 | 0.89 | 1.33 | NaN | NaN | NaN |
rki_nowcasting_df = rki_nowcasting_df_[['Datum des Erkrankungsbeginns', 'Punktschätzer der Anzahl Neuerkrankungen (ohne Glättung)', 'Punktschätzer der Anzahl Neuerkrankungen']].copy()
rki_nowcasting_df.columns = ['index', 'new_confirmed', 'new_confirmed_']
rki_nowcasting_df['index'] = pd.to_datetime(rki_nowcasting_df['index'])
rki_nowcasting_df = rki_nowcasting_df.set_index('index')
last_date = rki_nowcasting_df.index[-1] + pd.DateOffset(days=1)
last_value = cbr_germany.df['confirmed'].loc[last_date]
# last_date, last_value
ldf = pd.DataFrame(index=rki_nowcasting_df.index)
ldf['confirmed'] = last_value - rki_nowcasting_df['new_confirmed'].values[::-1].cumsum()[::-1]
ldf.loc[last_date] = [last_value]
ldf['recovered'] = 0
ldf['death'] = 0
ldf['new_confirmed'] = covid19.discrete_diff(ldf['confirmed'])
ldf['new_recovered'] = covid19.discrete_diff(ldf['recovered'])
ldf['new_death'] = covid19.discrete_diff(ldf['death'])
rki_nowcasting_df = ldf.copy()
cbr_germany4 = covid19.CasesByRegion('Germany', df=rki_nowcasting_df)
cbr_germany4.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-26 | 206505 | 0 | 0 | 612.0 | 0.0 | 0.0 |
2020-07-27 | 207125 | 0 | 0 | 620.0 | 0.0 | 0.0 |
2020-07-28 | 207970 | 0 | 0 | 845.0 | 0.0 | 0.0 |
2020-07-29 | 208758 | 0 | 0 | 788.0 | 0.0 | 0.0 |
2020-07-30 | 209486 | 0 | 0 | 728.0 | 0.0 | 0.0 |
cbr_germany4.plot_daily_stats(days=60)
<matplotlib.axes._subplots.AxesSubplot at 0x7f5710107da0>
cbr_germany4.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany4.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 246.72280764129792; growth-rate: 0.002, date:2020-09-07 00:00:00, projected value: 424.83971739873175
cbr_germany4.calculate_R_estimates()
cbr_germany4.R().round(3)
/home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:162: ValueWarning:No frequency information was provided, so inferred frequency D will be used.
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
index | |||||
2020-07-28 | 0.999 | 1.218 | 1.281 | 1.281 | 1.166 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany4.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f56e0112f28>
ldf = cbr_germany.df.loc[cbr_germany4.df.index, ['new_confirmed']]
ldf['new_confirmed_nc'] = cbr_germany4.df['new_confirmed']
ldf['delta'] = ldf['new_confirmed_nc'] - ldf['new_confirmed']
ldf = ldf.astype(np.int)
ldf.iloc[-30:]
new_confirmed | new_confirmed_nc | delta | |
---|---|---|---|
index | |||
2020-07-01 | 425 | 442 | 17 |
2020-07-02 | 390 | 402 | 12 |
2020-07-03 | 397 | 342 | -55 |
2020-07-04 | 281 | 348 | 67 |
2020-07-05 | 228 | 295 | 67 |
2020-07-06 | 343 | 300 | -43 |
2020-07-07 | 370 | 361 | -9 |
2020-07-08 | 389 | 352 | -37 |
2020-07-09 | 385 | 335 | -50 |
2020-07-10 | 457 | 377 | -80 |
2020-07-11 | 341 | 437 | 96 |
2020-07-12 | 226 | 394 | 168 |
2020-07-13 | 448 | 366 | -82 |
2020-07-14 | 461 | 540 | 79 |
2020-07-15 | 565 | 528 | -37 |
2020-07-16 | 489 | 552 | 63 |
2020-07-17 | 513 | 473 | -40 |
2020-07-18 | 456 | 520 | 64 |
2020-07-19 | 359 | 476 | 117 |
2020-07-20 | 543 | 515 | -28 |
2020-07-21 | 566 | 641 | 75 |
2020-07-22 | 630 | 703 | 73 |
2020-07-23 | 593 | 724 | 131 |
2020-07-24 | 677 | 671 | -6 |
2020-07-25 | 505 | 686 | 181 |
2020-07-26 | 339 | 612 | 273 |
2020-07-27 | 494 | 620 | 126 |
2020-07-28 | 537 | 845 | 308 |
2020-07-29 | 595 | 788 | 193 |
2020-07-30 | 527 | 728 | 201 |