#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', "-a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3")
# In[2]:
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import sklearn, sklearn.linear_model
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 42
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
# In[3]:
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='
'
for g in args:
html_str += ''
html_str += g._repr_svg_()
html_str += ' | '
html_str += '
'
display_html(html_str,raw=True)
display(HTML(""))
# In[4]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '1')
get_ipython().run_line_magic('aimport', 'covid19')
# Data Source: Bayerische Landesamt für Gesundheit und Lebensmittelsicherheit (LGL): [Übersicht der Fallzahlen von Coronavirusinfektionen](https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm)
# [web-scraping-tutorial-python](https://www.dataquest.io/blog/web-scraping-tutorial-python/)
# In[5]:
import requests
import urllib.request
import time
import bs4
import re
# from bs4 import BeautifulSoup
# In[6]:
import decimal
import locale
# In[7]:
import dateparser
# In[8]:
url = 'https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm'
response = requests.get(url)
# In[9]:
response
# In[10]:
soup = bs4.BeautifulSoup(response.text, "html.parser")
# In[11]:
# soup.find_all(class_="accordion")
# In[12]:
tableFaelle = soup.find(id="tableFaelle")
# In[13]:
# tableFaelle
# In[14]:
# dateparser.parse('01. Mrz 2020', date_formats=['%d. %b %Y'], languages=['de'])
# In[15]:
# dateparser.parse('01. Mar 2020', date_formats=['%d. %b %Y'], languages=['de'])
# In[16]:
# dateparser.parse('01. März 2020', languages=['de'])
# In[17]:
locale.setlocale(locale.LC_ALL, 'de_DE.UTF8')
df = pd.DataFrame(columns=['report_date', 'new_confirmed'])
for i, tr in enumerate(tableFaelle.find_all('tr')):
if i == 0:
continue
td = tr.find_all('td')
dtstr = td[0].text + '2020'
dtstr = re.sub('Mrz','März', dtstr)
dt = pd.to_datetime(dateparser.parse(dtstr, languages=['de']))
ct = locale.atof(td[3].text, decimal.Decimal)
df.loc[i] = [dt, ct]
df = df.set_index('report_date')
df['confirmed'] = df.new_confirmed.cumsum()
df = df[['confirmed', 'new_confirmed']]
df = df.astype(np.float)
# df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))]
# In[18]:
today_string = datetime.datetime.now().date().strftime('%Y-%m-%d')
today_string
# In[19]:
bavaria_df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))].copy()
# In[20]:
df.to_excel("bavaria_covid19_data.xlsx")
# In[21]:
# df.to_excel(today_string +"_bavaria_covid19_data.xlsx")
# In[22]:
display_side_by_side(df.tail(), bavaria_df.tail())
# In[23]:
bavaria_df['recovered'] = 0
bavaria_df['death'] = 0
bavaria_df['new_recovered'] = 0
bavaria_df['new_death'] = 0
cbr_bavaria = covid19.CasesByRegion('Bavaria', df=bavaria_df)
# Bavaria first dead person date: `2020-03-12`
# In[24]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_daily_stats(ax=ax, days=40)
# [Liste der deutschen Bundesländer nach Bevölkerung](https://de.wikipedia.org/wiki/Liste_der_deutschen_Bundesl%C3%A4nder_nach_Bev%C3%B6lkerung)
# In[25]:
einwohner_deutschland = 83019213.0
einwohner_bayern = 13076721.0
prozent_bayern = einwohner_bayern / einwohner_deutschland
prozent_bayern
# [Bevölkerung Europa](https://www.indexmundi.com/map/?v=21&r=eu&l=de)
# In[26]:
einwohner_österreich = 8793370.0
prozent_österreich = einwohner_österreich / einwohner_deutschland
prozent_österreich
# In[27]:
bavaria_new_confirmed_threshold = 100.0 * prozent_bayern // 1 + 1
bavaria_new_confirmed_threshold
# | prediction date | predicted day for reaching threshold | predicted max | growth rate | steady state rate |
# | :--- | :--- | ---: | ---: | ---: |
# | 2020-04-02 | 2020-04-20 | 23948 | 0.059 | - |
# | 2020-04-03 | 2020-04-21 | 24967 | 0.062 | - |
# | 2020-04-04 | 2020-04-24 | 28251 | 0.063 | - |
# | 2020-04-05 | 2020-04-27 | 30820 | 0.060 | - |
# | 2020-04-07 | 2020-04-29 | 33170 | 0.049 | - |
# | 2020-04-08 | 2020-04-29 | 33148 | 0.042 | - |
# | 2020-04-09 | 2020-04-29 | 33534 | 0.036 | - |
# | 2020-04-10 | - | - | 0.046 | 1211 |
# | 2020-04-11 | - | - | 0.043 | 1195 |
# | 2020-04-12 | - | - | 0.039 | 1129 |
# | 2020-04-14 | - | - | 0.030 | 817 |
# | 2020-04-15 | 2020-05-05 | 37238 | 0.017 | - |
# | 2020-04-16 | 2020-05-05 | 37391 | 0.014 | - |
# | 2020-04-19 | - | - | 0.017 | 500 |
# | 2020-04-21 | - | - | 0.015 | 492 |
# | 2020-04-22 | - | - | 0.015 | 499 |
# | 2020-04-23 | - | - | 0.014 | 485 |
# | 2020-04-24 | - | - | 0.014 | 481 |
# | 2020-04-26 | 2020-05-23 | 42926 | 0.009 | - |
# | 2020-04-27 | 2020-05-25 | 43216 | 0.008 | - |
# | 2020-04-28 | 2020-05-26 | 43529 | 0.008 | - |
# | 2020-04-29 | - | - | 0.012 | 405 |
# | 2020-04-30 | - | - | 0.012 | 364 |
# | 2020-05-02 | - | - | 0.011 | 355 |
# | 2020-05-03 | - | - | 0.010 | 347 |
# | 2020-05-04 | - | - | 0.010 | 339 |
# | 2020-05-05 | - | - | 0.009 | 322 |
# | 2020-05-07 | - | - | 0.009 | 285 |
# | 2020-05-08 | - | - | 0.008 | 274 |
# | 2020-05-09 | - | - | 0.008 | 265 |
# | 2020-05-11 | - | - | 0.007 | 262 |
# | 2020-05-12 | - | - | 0.007 | 256 |
# | 2020-05-13 | - | - | 0.007 | 248 |
# | 2020-05-15 | - | - | 0.004 | 157 |
# | 2020-05-17 | - | - | 0.004 | 162 |
# | 2020-05-19 | - | - | 0.004 | 161 |
# | 2020-05-20 | - | - | 0.003 | 152 |
# | 2020-05-23 | - | - | 0.003 | 128 |
# | 2020-05-29 | - | - | 0.003 | 119 |
# | 2020-06-07 | - | - | 0.003 | 99 |
# | 2020-06-14 | - | - | 0.002 | 87 |
# | 2020-06-24 | - | - | 0.002 | 73 |
# In[28]:
cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22))
# In[29]:
cbr_bavaria.calculate_R_estimates()
cbr_bavaria.R().round(3)
# In[30]:
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10'
# In[31]:
df = covid19.get_austria_df()
cbr_austria = covid19.CasesByRegion('Austria', df=df)
cbr_austria.tail()
# In[32]:
austria_new_confirmed_threshold = 100.0 * prozent_österreich // 1 + 1
austria_new_confirmed_threshold
# | prediction date | predicted day for reaching 100 threshold | predicted max | growth rate | steady state rate |
# | :--- | :--- | ---: | ---: | ---: |
# | 2020-04-02 | 2020-04-23 | 13860 | 0.042 | - |
# | 2020-04-03 | 2020-04-23 | 13864 | 0.036 | - |
# | 2020-04-04 | 2020-04-23 | 14082 | 0.030 | - |
# | 2020-04-05 | 2020-04-22 | 13978 | 0.025 | - |
# | 2020-04-07 | 2020-04-22 | 14033 | 0.017 | - |
# | 2020-04-08 | 2020-04-23 | 14121 | 0.014 | - |
# | 2020-04-09 | 2020-04-23 | 14229 | 0.012 | - |
# | 2020-04-10 | 2020-04-23 | 14350 | 0.010 | - |
# | 2020-04-11 | 2020-04-24 | 14488 | 0.009 | - |
# | 2020-04-12 | - | - | 0.012 | 106 |
# | 2020-04-14 | - | - | 0.011 | 126 |
# | 2020-04-15 | - | - | 0.010 | 114 |
# | 2020-04-16 | - | - | 0.009 | 108 |
# | 2020-04-19 | - | - | 0.007 | 95 |
# | 2020-04-21 | - | - | 0.006 | 83 |
# | 2020-04-22 | - | - | 0.006 | 77 |
# | 2020-04-23 | - | - | 0.006 | 73 |
# | 2020-04-24 | - | - | 0.005 | 69 |
# | 2020-04-26 | 2020-05-01 | 15208 | 0.002 | - |
# | 2020-04-27 | 2020-05-01 | 15096 | 0.002 | - |
# | 2020-04-28 | 2020-05-01 | 15129 | 0.002 | - |
# | 2020-04-29 | - | - | 0.005 | 84 |
# | 2020-04-30 | - | - | 0.005 | 81 |
# | 2020-05-02 | - | - | 0.005 | 76 |
# | 2020-05-03 | - | - | 0.004 | 75 |
# | 2020-05-04 | - | - | 0.004 | 73 |
# | 2020-05-05 | - | - | 0.004 | 70 |
# | 2020-05-07 | - | - | 0.004 | 63 |
# | 2020-05-08 | - | - | 0.004 | 60 |
# | 2020-05-09 | - | - | 0.003 | 58 |
# | 2020-05-11 | - | - | 0.003 | 58 |
# | 2020-05-12 | - | - | 0.003 | 57 |
# | 2020-05-13 | - | - | 0.003 | 55 |
# | 2020-05-15 | - | - | 0.003 | 55 |
# | 2020-05-17 | - | - | 0.003 | 55 |
# | 2020-05-19 | - | - | 0.003 | 56 |
# | 2020-05-20 | - | - | 0.003 | 55 |
# | 2020-05-23 | - | - | 0.003 | 55 |
# | 2020-05-29 | - | - | 0.003 | 51 |
# | 2020-06-07 | - | - | 0.003 | 46 |
# | 2020-06-14 | - | - | 0.002 | 44 |
# | 2020-06-24 | - | - | 0.002 | 42 |
# In[33]:
cbr_austria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=austria_new_confirmed_threshold)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
cbr_austria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 15))