%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
cs224 last updated: 2020-08-03 CPython 3.6.10 IPython 7.15.0 numpy 1.18.5 xarray 0.15.1 scipy 1.5.0 pandas 1.0.5 sklearn 0.23.1 matplotlib 3.2.2 seaborn 0.10.1 pymc3 3.9.2
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import sklearn, sklearn.linear_model
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 42
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='<table><tr>'
for g in args:
html_str += '<td>'
html_str += g._repr_svg_()
html_str += '</td>'
html_str += '</tr></table>'
display_html(html_str,raw=True)
display(HTML("<style>.container { width:70% !important; }</style>"))
%load_ext autoreload
%autoreload 1
%aimport covid19
Data Source: Bayerische Landesamt für Gesundheit und Lebensmittelsicherheit (LGL): Übersicht der Fallzahlen von Coronavirusinfektionen
import requests
import urllib.request
import time
import bs4
import re
# from bs4 import BeautifulSoup
import decimal
import locale
import dateparser
url = 'https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm'
response = requests.get(url)
response
<Response [200]>
soup = bs4.BeautifulSoup(response.text, "html.parser")
# soup.find_all(class_="accordion")
tableFaelle = soup.find(id="tableFaelle")
# tableFaelle
# dateparser.parse('01. Mrz 2020', date_formats=['%d. %b %Y'], languages=['de'])
# dateparser.parse('01. Mar 2020', date_formats=['%d. %b %Y'], languages=['de'])
# dateparser.parse('01. März 2020', languages=['de'])
locale.setlocale(locale.LC_ALL, 'de_DE.UTF8')
df = pd.DataFrame(columns=['report_date', 'new_confirmed'])
for i, tr in enumerate(tableFaelle.find_all('tr')):
if i == 0:
continue
td = tr.find_all('td')
dtstr = td[0].text + '2020'
dtstr = re.sub('Mrz','März', dtstr)
dt = pd.to_datetime(dateparser.parse(dtstr, languages=['de']))
ct = locale.atof(td[3].text, decimal.Decimal)
df.loc[i] = [dt, ct]
df = df.set_index('report_date')
df['confirmed'] = df.new_confirmed.cumsum()
df = df[['confirmed', 'new_confirmed']]
df = df.astype(np.float)
# df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))]
today_string = datetime.datetime.now().date().strftime('%Y-%m-%d')
today_string
'2020-08-03'
bavaria_df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))].copy()
df.to_excel("bavaria_covid19_data.xlsx")
# df.to_excel(today_string +"_bavaria_covid19_data.xlsx")
display_side_by_side(df.tail(), bavaria_df.tail())
confirmed | new_confirmed | |
---|---|---|
report_date | ||
2020-07-27 | 50632.0 | 76.0 |
2020-07-28 | 50732.0 | 100.0 |
2020-07-29 | 50845.0 | 113.0 |
2020-07-30 | 50944.0 | 99.0 |
2020-07-31 | 51063.0 | 119.0 |
confirmed | new_confirmed | |
---|---|---|
report_date | ||
2020-07-27 | 50632.0 | 76.0 |
2020-07-28 | 50732.0 | 100.0 |
2020-07-29 | 50845.0 | 113.0 |
2020-07-30 | 50944.0 | 99.0 |
2020-07-31 | 51063.0 | 119.0 |
bavaria_df['recovered'] = 0
bavaria_df['death'] = 0
bavaria_df['new_recovered'] = 0
bavaria_df['new_death'] = 0
cbr_bavaria = covid19.CasesByRegion('Bavaria', df=bavaria_df)
Bavaria first dead person date: 2020-03-12
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_daily_stats(ax=ax, days=40)
<matplotlib.axes._subplots.AxesSubplot at 0x7f85a1deeda0>
einwohner_deutschland = 83019213.0
einwohner_bayern = 13076721.0
prozent_bayern = einwohner_bayern / einwohner_deutschland
prozent_bayern
0.15751439368619405
einwohner_österreich = 8793370.0
prozent_österreich = einwohner_österreich / einwohner_deutschland
prozent_österreich
0.10591969837150829
bavaria_new_confirmed_threshold = 100.0 * prozent_bayern // 1 + 1
bavaria_new_confirmed_threshold
16.0
prediction date | predicted day for reaching threshold | predicted max | growth rate | steady state rate |
---|---|---|---|---|
2020-04-02 | 2020-04-20 | 23948 | 0.059 | - |
2020-04-03 | 2020-04-21 | 24967 | 0.062 | - |
2020-04-04 | 2020-04-24 | 28251 | 0.063 | - |
2020-04-05 | 2020-04-27 | 30820 | 0.060 | - |
2020-04-07 | 2020-04-29 | 33170 | 0.049 | - |
2020-04-08 | 2020-04-29 | 33148 | 0.042 | - |
2020-04-09 | 2020-04-29 | 33534 | 0.036 | - |
2020-04-10 | - | - | 0.046 | 1211 |
2020-04-11 | - | - | 0.043 | 1195 |
2020-04-12 | - | - | 0.039 | 1129 |
2020-04-14 | - | - | 0.030 | 817 |
2020-04-15 | 2020-05-05 | 37238 | 0.017 | - |
2020-04-16 | 2020-05-05 | 37391 | 0.014 | - |
2020-04-19 | - | - | 0.017 | 500 |
2020-04-21 | - | - | 0.015 | 492 |
2020-04-22 | - | - | 0.015 | 499 |
2020-04-23 | - | - | 0.014 | 485 |
2020-04-24 | - | - | 0.014 | 481 |
2020-04-26 | 2020-05-23 | 42926 | 0.009 | - |
2020-04-27 | 2020-05-25 | 43216 | 0.008 | - |
2020-04-28 | 2020-05-26 | 43529 | 0.008 | - |
2020-04-29 | - | - | 0.012 | 405 |
2020-04-30 | - | - | 0.012 | 364 |
2020-05-02 | - | - | 0.011 | 355 |
2020-05-03 | - | - | 0.010 | 347 |
2020-05-04 | - | - | 0.010 | 339 |
2020-05-05 | - | - | 0.009 | 322 |
2020-05-07 | - | - | 0.009 | 285 |
2020-05-08 | - | - | 0.008 | 274 |
2020-05-09 | - | - | 0.008 | 265 |
2020-05-11 | - | - | 0.007 | 262 |
2020-05-12 | - | - | 0.007 | 256 |
2020-05-13 | - | - | 0.007 | 248 |
2020-05-15 | - | - | 0.004 | 157 |
2020-05-17 | - | - | 0.004 | 162 |
2020-05-19 | - | - | 0.004 | 161 |
2020-05-20 | - | - | 0.003 | 152 |
2020-05-23 | - | - | 0.003 | 128 |
2020-05-29 | - | - | 0.003 | 119 |
2020-06-07 | - | - | 0.003 | 99 |
2020-06-14 | - | - | 0.002 | 87 |
2020-06-24 | - | - | 0.002 | 73 |
cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 165.40695572785646; growth-rate: 0.001, date:2020-09-08 00:00:00, projected value: 73.48845146958611
cbr_bavaria.calculate_R_estimates()
cbr_bavaria.R().round(3)
/home/local/cs/local/install/anaconda3-2020.02-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:218: ValueWarning:A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
fit_R | gp_R | kf_R | ll_R | mean_R | |
---|---|---|---|---|---|
report_date | |||||
2020-07-29 | 1.0 | 1.294 | 1.267 | 1.281 | 1.192 |
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10'
<matplotlib.axes._subplots.AxesSubplot at 0x7f859bc10c88>
df = covid19.get_austria_df()
cbr_austria = covid19.CasesByRegion('Austria', df=df)
cbr_austria.tail()
confirmed | recovered | death | new_confirmed | new_recovered | new_death | |
---|---|---|---|---|---|---|
index | ||||||
2020-07-29 | 20832 | 18528 | 698 | 180 | 149 | 10 |
2020-07-30 | 20935 | 18628 | 698 | 103 | 100 | 0 |
2020-07-31 | 21110 | 18758 | 698 | 175 | 130 | 0 |
2020-08-01 | 21192 | 18911 | 698 | 82 | 153 | 0 |
2020-08-02 | 21284 | 18984 | 698 | 92 | 73 | 0 |
austria_new_confirmed_threshold = 100.0 * prozent_österreich // 1 + 1
austria_new_confirmed_threshold
11.0
prediction date | predicted day for reaching 100 threshold | predicted max | growth rate | steady state rate |
---|---|---|---|---|
2020-04-02 | 2020-04-23 | 13860 | 0.042 | - |
2020-04-03 | 2020-04-23 | 13864 | 0.036 | - |
2020-04-04 | 2020-04-23 | 14082 | 0.030 | - |
2020-04-05 | 2020-04-22 | 13978 | 0.025 | - |
2020-04-07 | 2020-04-22 | 14033 | 0.017 | - |
2020-04-08 | 2020-04-23 | 14121 | 0.014 | - |
2020-04-09 | 2020-04-23 | 14229 | 0.012 | - |
2020-04-10 | 2020-04-23 | 14350 | 0.010 | - |
2020-04-11 | 2020-04-24 | 14488 | 0.009 | - |
2020-04-12 | - | - | 0.012 | 106 |
2020-04-14 | - | - | 0.011 | 126 |
2020-04-15 | - | - | 0.010 | 114 |
2020-04-16 | - | - | 0.009 | 108 |
2020-04-19 | - | - | 0.007 | 95 |
2020-04-21 | - | - | 0.006 | 83 |
2020-04-22 | - | - | 0.006 | 77 |
2020-04-23 | - | - | 0.006 | 73 |
2020-04-24 | - | - | 0.005 | 69 |
2020-04-26 | 2020-05-01 | 15208 | 0.002 | - |
2020-04-27 | 2020-05-01 | 15096 | 0.002 | - |
2020-04-28 | 2020-05-01 | 15129 | 0.002 | - |
2020-04-29 | - | - | 0.005 | 84 |
2020-04-30 | - | - | 0.005 | 81 |
2020-05-02 | - | - | 0.005 | 76 |
2020-05-03 | - | - | 0.004 | 75 |
2020-05-04 | - | - | 0.004 | 73 |
2020-05-05 | - | - | 0.004 | 70 |
2020-05-07 | - | - | 0.004 | 63 |
2020-05-08 | - | - | 0.004 | 60 |
2020-05-09 | - | - | 0.003 | 58 |
2020-05-11 | - | - | 0.003 | 58 |
2020-05-12 | - | - | 0.003 | 57 |
2020-05-13 | - | - | 0.003 | 55 |
2020-05-15 | - | - | 0.003 | 55 |
2020-05-17 | - | - | 0.003 | 55 |
2020-05-19 | - | - | 0.003 | 56 |
2020-05-20 | - | - | 0.003 | 55 |
2020-05-23 | - | - | 0.003 | 55 |
2020-05-29 | - | - | 0.003 | 51 |
2020-06-07 | - | - | 0.003 | 46 |
2020-06-14 | - | - | 0.002 | 44 |
2020-06-24 | - | - | 0.002 | 42 |
cbr_austria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=austria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_austria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 15))
/home/local/cs/workspaces/covid-19-data-analysis/covid19.py:1526: UserWarning:Exception in curve_fit: Optimal parameters not found: Number of calls to function has reached maxfev = 600./ None
sigmoid+asymmetric+linear: seor: 60.83140122644205; growth-rate: 0.003, date:2020-09-10 00:00:00, projected value: 63.10625191231651