#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3") # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr import matplotlib as mpl import pymc3 as pm import theano as thno import theano.tensor as T import sklearn, sklearn.linear_model import datetime, time, math from dateutil import relativedelta from collections import OrderedDict SEED = 42 np.random.seed(SEED) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # pd.set_option('display.float_format', lambda x: '%.2f' % x) np.set_printoptions(edgeitems=10) np.set_printoptions(linewidth=1000) np.set_printoptions(suppress=True) np.core.arrayprint._line_width = 180 sns.set() # In[3]: from IPython.display import display, HTML from IPython.display import display_html def display_side_by_side(*args): html_str='' for df in args: if type(df) == np.ndarray: df = pd.DataFrame(df) html_str+=df.to_html() html_str = html_str.replace('table','table style="display:inline"') # print(html_str) display_html(html_str,raw=True) CSS = """ .output { flex-direction: row; } """ def display_graphs_side_by_side(*args): html_str='' for g in args: html_str += '' html_str += '
' html_str += g._repr_svg_() html_str += '
' display_html(html_str,raw=True) display(HTML("")) # In[4]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '1') get_ipython().run_line_magic('aimport', 'covid19') # Data Source: Bayerische Landesamt für Gesundheit und Lebensmittelsicherheit (LGL): [Übersicht der Fallzahlen von Coronavirusinfektionen](https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm) # [web-scraping-tutorial-python](https://www.dataquest.io/blog/web-scraping-tutorial-python/) # In[5]: import requests import urllib.request import time import bs4 import re # from bs4 import BeautifulSoup # In[6]: import decimal import locale # In[7]: import dateparser # In[8]: url = 'https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm' response = requests.get(url) # In[9]: response # In[10]: soup = bs4.BeautifulSoup(response.text, "html.parser") # In[11]: # soup.find_all(class_="accordion") # In[12]: tableFaelle = soup.find(id="tableFaelle") # In[13]: # tableFaelle # In[14]: # dateparser.parse('01. Mrz 2020', date_formats=['%d. %b %Y'], languages=['de']) # In[15]: # dateparser.parse('01. Mar 2020', date_formats=['%d. %b %Y'], languages=['de']) # In[16]: # dateparser.parse('01. März 2020', languages=['de']) # In[17]: locale.setlocale(locale.LC_ALL, 'de_DE.UTF8') df = pd.DataFrame(columns=['report_date', 'new_confirmed']) for i, tr in enumerate(tableFaelle.find_all('tr')): if i == 0: continue td = tr.find_all('td') dtstr = td[0].text + '2020' dtstr = re.sub('Mrz','März', dtstr) dt = pd.to_datetime(dateparser.parse(dtstr, languages=['de'])) ct = locale.atof(td[3].text, decimal.Decimal) df.loc[i] = [dt, ct] df = df.set_index('report_date') df['confirmed'] = df.new_confirmed.cumsum() df = df[['confirmed', 'new_confirmed']] df = df.astype(np.float) # df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))] # In[18]: today_string = datetime.datetime.now().date().strftime('%Y-%m-%d') today_string # In[19]: bavaria_df = df[df.index <= pd.to_datetime(datetime.datetime.now().date() - datetime.timedelta(days=2))].copy() # In[20]: df.to_excel("bavaria_covid19_data.xlsx") # In[21]: # df.to_excel(today_string +"_bavaria_covid19_data.xlsx") # In[22]: display_side_by_side(df.tail(), bavaria_df.tail()) # In[23]: bavaria_df['recovered'] = 0 bavaria_df['death'] = 0 bavaria_df['new_recovered'] = 0 bavaria_df['new_death'] = 0 cbr_bavaria = covid19.CasesByRegion('Bavaria', df=bavaria_df) # Bavaria first dead person date: `2020-03-12` # In[24]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bavaria.plot_daily_stats(ax=ax, days=40) # [Liste der deutschen Bundesländer nach Bevölkerung](https://de.wikipedia.org/wiki/Liste_der_deutschen_Bundesl%C3%A4nder_nach_Bev%C3%B6lkerung) # In[25]: einwohner_deutschland = 83019213.0 einwohner_bayern = 13076721.0 prozent_bayern = einwohner_bayern / einwohner_deutschland prozent_bayern # [Bevölkerung Europa](https://www.indexmundi.com/map/?v=21&r=eu&l=de) # In[26]: einwohner_österreich = 8793370.0 prozent_österreich = einwohner_österreich / einwohner_deutschland prozent_österreich # In[27]: bavaria_new_confirmed_threshold = 100.0 * prozent_bayern // 1 + 1 bavaria_new_confirmed_threshold # | prediction date | predicted day for reaching threshold | predicted max | growth rate | steady state rate | # | :--- | :--- | ---: | ---: | ---: | # | 2020-04-02 | 2020-04-20 | 23948 | 0.059 | - | # | 2020-04-03 | 2020-04-21 | 24967 | 0.062 | - | # | 2020-04-04 | 2020-04-24 | 28251 | 0.063 | - | # | 2020-04-05 | 2020-04-27 | 30820 | 0.060 | - | # | 2020-04-07 | 2020-04-29 | 33170 | 0.049 | - | # | 2020-04-08 | 2020-04-29 | 33148 | 0.042 | - | # | 2020-04-09 | 2020-04-29 | 33534 | 0.036 | - | # | 2020-04-10 | - | - | 0.046 | 1211 | # | 2020-04-11 | - | - | 0.043 | 1195 | # | 2020-04-12 | - | - | 0.039 | 1129 | # | 2020-04-14 | - | - | 0.030 | 817 | # | 2020-04-15 | 2020-05-05 | 37238 | 0.017 | - | # | 2020-04-16 | 2020-05-05 | 37391 | 0.014 | - | # | 2020-04-19 | - | - | 0.017 | 500 | # | 2020-04-21 | - | - | 0.015 | 492 | # | 2020-04-22 | - | - | 0.015 | 499 | # | 2020-04-23 | - | - | 0.014 | 485 | # | 2020-04-24 | - | - | 0.014 | 481 | # | 2020-04-26 | 2020-05-23 | 42926 | 0.009 | - | # | 2020-04-27 | 2020-05-25 | 43216 | 0.008 | - | # | 2020-04-28 | 2020-05-26 | 43529 | 0.008 | - | # | 2020-04-29 | - | - | 0.012 | 405 | # | 2020-04-30 | - | - | 0.012 | 364 | # | 2020-05-02 | - | - | 0.011 | 355 | # | 2020-05-03 | - | - | 0.010 | 347 | # | 2020-05-04 | - | - | 0.010 | 339 | # | 2020-05-05 | - | - | 0.009 | 322 | # | 2020-05-07 | - | - | 0.009 | 285 | # | 2020-05-08 | - | - | 0.008 | 274 | # | 2020-05-09 | - | - | 0.008 | 265 | # | 2020-05-11 | - | - | 0.007 | 262 | # | 2020-05-12 | - | - | 0.007 | 256 | # | 2020-05-13 | - | - | 0.007 | 248 | # | 2020-05-15 | - | - | 0.004 | 157 | # | 2020-05-17 | - | - | 0.004 | 162 | # | 2020-05-19 | - | - | 0.004 | 161 | # | 2020-05-20 | - | - | 0.003 | 152 | # | 2020-05-23 | - | - | 0.003 | 128 | # | 2020-05-29 | - | - | 0.003 | 119 | # | 2020-06-07 | - | - | 0.003 | 99 | # | 2020-06-14 | - | - | 0.002 | 87 | # | 2020-06-24 | - | - | 0.002 | 73 | # In[28]: cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[29]: cbr_bavaria.calculate_R_estimates() cbr_bavaria.R().round(3) # In[30]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[31]: df = covid19.get_austria_df() cbr_austria = covid19.CasesByRegion('Austria', df=df) cbr_austria.tail() # In[32]: austria_new_confirmed_threshold = 100.0 * prozent_österreich // 1 + 1 austria_new_confirmed_threshold # | prediction date | predicted day for reaching 100 threshold | predicted max | growth rate | steady state rate | # | :--- | :--- | ---: | ---: | ---: | # | 2020-04-02 | 2020-04-23 | 13860 | 0.042 | - | # | 2020-04-03 | 2020-04-23 | 13864 | 0.036 | - | # | 2020-04-04 | 2020-04-23 | 14082 | 0.030 | - | # | 2020-04-05 | 2020-04-22 | 13978 | 0.025 | - | # | 2020-04-07 | 2020-04-22 | 14033 | 0.017 | - | # | 2020-04-08 | 2020-04-23 | 14121 | 0.014 | - | # | 2020-04-09 | 2020-04-23 | 14229 | 0.012 | - | # | 2020-04-10 | 2020-04-23 | 14350 | 0.010 | - | # | 2020-04-11 | 2020-04-24 | 14488 | 0.009 | - | # | 2020-04-12 | - | - | 0.012 | 106 | # | 2020-04-14 | - | - | 0.011 | 126 | # | 2020-04-15 | - | - | 0.010 | 114 | # | 2020-04-16 | - | - | 0.009 | 108 | # | 2020-04-19 | - | - | 0.007 | 95 | # | 2020-04-21 | - | - | 0.006 | 83 | # | 2020-04-22 | - | - | 0.006 | 77 | # | 2020-04-23 | - | - | 0.006 | 73 | # | 2020-04-24 | - | - | 0.005 | 69 | # | 2020-04-26 | 2020-05-01 | 15208 | 0.002 | - | # | 2020-04-27 | 2020-05-01 | 15096 | 0.002 | - | # | 2020-04-28 | 2020-05-01 | 15129 | 0.002 | - | # | 2020-04-29 | - | - | 0.005 | 84 | # | 2020-04-30 | - | - | 0.005 | 81 | # | 2020-05-02 | - | - | 0.005 | 76 | # | 2020-05-03 | - | - | 0.004 | 75 | # | 2020-05-04 | - | - | 0.004 | 73 | # | 2020-05-05 | - | - | 0.004 | 70 | # | 2020-05-07 | - | - | 0.004 | 63 | # | 2020-05-08 | - | - | 0.004 | 60 | # | 2020-05-09 | - | - | 0.003 | 58 | # | 2020-05-11 | - | - | 0.003 | 58 | # | 2020-05-12 | - | - | 0.003 | 57 | # | 2020-05-13 | - | - | 0.003 | 55 | # | 2020-05-15 | - | - | 0.003 | 55 | # | 2020-05-17 | - | - | 0.003 | 55 | # | 2020-05-19 | - | - | 0.003 | 56 | # | 2020-05-20 | - | - | 0.003 | 55 | # | 2020-05-23 | - | - | 0.003 | 55 | # | 2020-05-29 | - | - | 0.003 | 51 | # | 2020-06-07 | - | - | 0.003 | 46 | # | 2020-06-14 | - | - | 0.002 | 44 | # | 2020-06-24 | - | - | 0.002 | 42 | # In[33]: cbr_austria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=austria_new_confirmed_threshold) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_austria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 15))