In [1]:
%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
cs224 
last updated: 2020-08-03 

CPython 3.6.10
IPython 7.15.0

numpy 1.18.5
xarray 0.15.1
scipy 1.5.0
pandas 1.0.5
sklearn 0.23.1
matplotlib 3.2.2
seaborn 0.10.1
pymc3 3.9.2
In [2]:
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl

import pymc3 as pm

import theano as thno
import theano.tensor as T

import sklearn, sklearn.linear_model

import datetime, time, math
from dateutil import relativedelta

from collections import OrderedDict

SEED = 42
np.random.seed(SEED)

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180

sns.set()
# sns.set_style("whitegrid")
In [3]:
from IPython.display import display, HTML

from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        if type(df) == np.ndarray:
            df = pd.DataFrame(df)
        html_str+=df.to_html()
    html_str = html_str.replace('table','table style="display:inline"')
    # print(html_str)
    display_html(html_str,raw=True)

CSS = """
.output {
    flex-direction: row;
}
"""

def display_graphs_side_by_side(*args):
    html_str='<table><tr>'
    for g in args:
        html_str += '<td>'
        html_str += g._repr_svg_()
        html_str += '</td>'
    html_str += '</tr></table>'
    display_html(html_str,raw=True)
    

display(HTML("<style>.container { width:70% !important; }</style>"))
In [4]:
%load_ext autoreload
%autoreload 1
%aimport covid19
In [5]:
df = covid19.get_rki_df()
cbr_germany = covid19.CasesByRegion('Germany', df=df)
cbr_germany.tail()
Out[5]:
confirmed recovered death new_confirmed new_recovered new_death
index
2020-07-29 208959 193483 9135 595 0 4
2020-07-30 209486 193483 9144 527 0 9
2020-07-31 210009 193483 9147 523 0 3
2020-08-01 210305 193483 9154 296 0 7
2020-08-02 210395 193483 9154 90 0 0
In [6]:
cbr_germany.plot_daily_stats()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f572fb7a828>
In [7]:
cbr_germany.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 303.88584981440266; growth-rate: 0.002, date:2020-09-10 00:00:00, projected value: 390.8530294495339
In [8]:
# country_name, first_date, init_add, restriction_start_date = 'Germany', pd.to_datetime('2020-03-09'), 0, datetime.datetime(2020, 3, 22)
# ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, in_df=cbr_germany.df, first_date=first_date, init_add=init_add)
# if len(lpopt) == 4:
#     steady_state_rate = lpopt[1] * lpopt[3]
# else:
#     steady_state_rate = 0.0
# print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate)
# fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
# ax = plt.subplot(1,1,1)
# ldf[['confirmed', label + '_fit']].iloc[1:,:].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5);
# ax.axvline(restriction_start_date);

# ax2 = ax.twinx()
# ldf[[label + '_fit_diff']].iloc[1:,:].plot(ax=ax2, color=['steelblue']);
# lbl = 'confirmed' + '_diff'
# ldf[[lbl]].iloc[1:,:].reset_index().plot.scatter(ax=ax2, x = 'index', y = lbl, c='limegreen')


# l = len(ax.get_yticks())
# a1 = ax.get_yticks()[0]
# e1 = ax.get_yticks()[-1]
# a2 = ax2.get_yticks()[0]
# e2 = ax2.get_yticks()[-1]
# ax.set_yticks(np.linspace(a1, e1, l));
# ax2.set_yticks(np.linspace(a2, e2, l));
In [9]:
cbr_germany.fit_df0[['fit_diff']].apply(['max'])
Out[9]:
fit_diff
max 4867.318507
In [10]:
cbr_germany.calculate_R_estimates()
cbr_germany.R().round(3)
Out[10]:
fit_R gp_R kf_R ll_R mean_R
index
2020-07-31 0.998 0.843 0.737 0.894 0.912
In [11]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_germany.plot_R(ax=ax) # , plot_start_date='2020-03-10'
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f572f149e48>
In [12]:
# --------------------------------------------------------------------------------------------------------------------------------



# --------------------------------------------------------------------------------------------------------------------------------
In [13]:
df = covid19.get_rki_df(state='Bayern')
cbr_bavaria = covid19.CasesByRegion('Bavaria', df=df)
cbr_bavaria.tail()
Out[13]:
confirmed recovered death new_confirmed new_recovered new_death
index
2020-07-29 50944 47265 2622 81 0 0
2020-07-30 51007 47265 2622 63 0 0
2020-07-31 51114 47265 2622 107 0 0
2020-08-01 51154 47265 2622 40 0 0
2020-08-02 51155 47265 2622 1 0 0
In [14]:
einwohner_deutschland = 83019213.0
einwohner_bayern      = 13076721.0
prozent_bayern        = einwohner_bayern / einwohner_deutschland

bavaria_new_confirmed_threshold = (100.0 * prozent_bayern) // 1 + 1
bavaria_new_confirmed_threshold
Out[14]:
16.0
In [15]:
cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 80.69203984179356; growth-rate: 0.001, date:2020-09-10 00:00:00, projected value: 58.88780206634304
In [16]:
cbr_bavaria.calculate_R_estimates()
cbr_bavaria.R().round(3)
Out[16]:
fit_R gp_R kf_R ll_R mean_R
index
2020-07-31 0.999 0.985 0.928 1.093 1.025
In [17]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10'
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f57281989b0>
In [18]:
df = covid19.get_rki_df(state='Nordrhein-Westfalen')
cbr_nrw = covid19.CasesByRegion('NRW', df=df)
cbr_nrw.tail()
Out[18]:
confirmed recovered death new_confirmed new_recovered new_death
index
2020-07-29 48748 44540 1738 250 0 1
2020-07-30 48976 44540 1738 228 0 0
2020-07-31 49217 44540 1738 241 0 0
2020-08-01 49363 44540 1738 146 0 0
2020-08-02 49434 44540 1738 71 0 0
In [19]:
einwohner_nrw      = 17932651.0
prozent_nrw        = einwohner_nrw / einwohner_deutschland

nrw_new_confirmed_threshold = (100.0 * prozent_nrw) // 1 + 1
nrw_new_confirmed_threshold
Out[19]:
22.0
In [20]:
cbr_nrw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=nrw_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_nrw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1505: UserWarning:Exception in curve_fit 1
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 800./ [76940.460289       0.04298913    -0.10060466]
sigmoid+asymmetric+linear: seor: 93.32008759982396; growth-rate: 0.003, date:2020-09-10 00:00:00, projected value: 155.4332754077275
In [21]:
cbr_nrw.calculate_R_estimates()
cbr_nrw.R().round(3)
Out[21]:
fit_R gp_R kf_R ll_R mean_R
index
2020-07-31 1.0 0.945 0.921 1.007 0.984
In [22]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_nrw.plot_R(ax=ax) # , plot_start_date='2020-03-10'
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5718648518>
In [23]:
covid19.rki_data_df.Bundesland.unique()
Out[23]:
array(['Schleswig-Holstein', 'Hamburg', 'Niedersachsen', 'Bremen', 'Nordrhein-Westfalen', 'Hessen', 'Rheinland-Pfalz', 'Baden-W├╝rttemberg', 'Bayern', 'Saarland', 'Berlin', 'Brandenburg', 'Mecklenburg-Vorpommern', 'Sachsen', 'Sachsen-Anhalt', 'Th├╝ringen'], dtype=object)
In [24]:
df = covid19.get_rki_df(state='Baden-W')
cbr_bw = covid19.CasesByRegion('BW', df=df)
cbr_bw.tail()
Out[24]:
confirmed recovered death new_confirmed new_recovered new_death
index
2020-07-29 37218 34616 1847 62 0 0
2020-07-30 37272 34616 1847 54 0 0
2020-07-31 37272 34616 1847 0 0 0
2020-08-01 37272 34616 1847 0 0 0
2020-08-02 37272 34616 1847 0 0 0
In [25]:
einwohner_bw      = 11069533.0
prozent_bw        = einwohner_bw / einwohner_deutschland

bw_new_confirmed_threshold = (100.0 * prozent_bw) // 1 + 1
bw_new_confirmed_threshold
Out[25]:
14.0
In [26]:
cbr_bw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bw_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 60.543649370739864; growth-rate: 0.001, date:2020-09-10 00:00:00, projected value: 27.28878641816627
In [27]:
cbr_bw.calculate_R_estimates()
cbr_bw.R().round(3)
Out[27]:
fit_R gp_R kf_R ll_R mean_R
index
2020-07-31 0.993 0.567 0.0 0.421 0.66
In [28]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bw.plot_R(ax=ax) # , plot_start_date='2020-03-10'
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5718452390>
In [29]:
df = covid19.get_rki_df(county='LK Traunstein')
cbr_traunstein = covid19.CasesByRegion('LK Traunstein', df=df)
cbr_traunstein.tail()
Out[29]:
confirmed recovered death new_confirmed new_recovered new_death
index
2020-07-29 1316 1224 88 0 0 0
2020-07-30 1316 1224 88 0 0 0
2020-07-31 1316 1224 88 0 0 0
2020-08-01 1316 1224 88 0 0 0
2020-08-02 1316 1224 88 0 0 0
In [30]:
tage_inzidenz = 10
In [31]:
# https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html
einwohner_lk_traunstein = 177089
einwohner_sk_regensburg = 152610
einwohner_lk_regensburg = 193572
In [32]:
tage_inzidenz_lk_traunstein = cbr_traunstein.df['new_confirmed'][-tage_inzidenz:].sum() 
round(tage_inzidenz_lk_traunstein,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz  * 100000,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz  * 100000 * 7,1)
Out[32]:
(2, 0.1, 0.8)
In [33]:
cbr_traunstein.plot_daily_stats()
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f57183764e0>
In [34]:
cbr_traunstein.fit(first_date=pd.to_datetime('2020-03-09'))
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_traunstein.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
 /home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric: seor: 4.889133887357203, max asymptotic: 1314.5034715455804; growth-rate: 0.0, date:2020-09-10 00:00:00, projected value: 0.006071546213641148
In [35]:
cbr_traunstein.calculate_R_estimates()
cbr_traunstein.R().round(3)
 /home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:218: ValueWarning:A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
Out[35]:
fit_R gp_R kf_R ll_R mean_R
index
2020-07-31 0.611 0.0 0.0 0.535 0.382
In [36]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_traunstein.plot_R(ax=ax) # , plot_start_date='2020-03-10'
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f57180f6f98>