import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
%matplotlib widget
Get the latest data from https://github.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning.
!wget -O covid_19_clean_complete.csv https://raw.githubusercontent.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning/master/covid_19_clean_complete.csv
--2020-05-16 13:02:50-- https://raw.githubusercontent.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning/master/covid_19_clean_complete.csv Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.64.133, 151.101.192.133, 151.101.0.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.64.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 1369217 (1.3M) [text/plain] Saving to: ‘covid_19_clean_complete.csv’ covid_19_clean_comp 100%[===================>] 1.31M 18.1KB/s in 2m 9s 2020-05-16 13:05:11 (10.3 KB/s) - ‘covid_19_clean_complete.csv’ saved [1369217/1369217]
pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date']).head()
Province/State | Country/Region | Lat | Long | Date | Confirmed | Deaths | Recovered | |
---|---|---|---|---|---|---|---|---|
0 | NaN | Afghanistan | 33.0000 | 65.0000 | 2020-01-22 | 0 | 0 | 0 |
1 | NaN | Albania | 41.1533 | 20.1683 | 2020-01-22 | 0 | 0 | 0 |
2 | NaN | Algeria | 28.0339 | 1.6596 | 2020-01-22 | 0 | 0 | 0 |
3 | NaN | Andorra | 42.5063 | 1.5218 | 2020-01-22 | 0 | 0 | 0 |
4 | NaN | Angola | -11.2027 | 17.8739 | 2020-01-22 | 0 | 0 | 0 |
#read the dataset
covid_df = pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date'])
# Total by date and country
covid_df = covid_df.groupby(['Date','Country/Region']).agg({'Confirmed':np.sum})
# Get Top 10 Contries based on confirmed cases per date
covid_df = covid_df.groupby('Date')['Confirmed'].nlargest(10).reset_index(level=1,drop=True)
covid_df.head(20)
Date Country/Region 2020-01-22 China 548 Japan 2 Thailand 2 South Korea 1 Taiwan* 1 US 1 Afghanistan 0 Albania 0 Algeria 0 Andorra 0 2020-01-23 China 643 Thailand 3 Japan 2 Vietnam 2 Singapore 1 South Korea 1 Taiwan* 1 US 1 Afghanistan 0 Albania 0 Name: Confirmed, dtype: int64
#Country is the second level index of_covid_df
countries = np.array([ country[1] for country in covid_df.index])
countries = np.unique(countries) #Countries can be repeated on different Dates
cmap = plt.get_cmap('tab20')
colors = cmap(np.linspace(0, 1, len(countries)))
color_dict = dict(zip(countries,colors))
plt.figure()
date = pd.to_datetime('4/22/20',format="%m/%d/%y")
xvals = covid_df.loc[date].index
data = covid_df.loc[date].values
bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
plt.suptitle('Cumulative Confirmed Covid-19 Cases')
plt.title(date.strftime("%d %b %Y"))
ax = plt.gca()
ax.invert_yaxis()
Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …
plt.figure()
date = pd.to_datetime('4/22/20',format="%m/%d/%y")
xvals = covid_df.loc[date].index
data = covid_df.loc[date].values
bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
plt.suptitle('Cumulative Confirmed Covid-19 Cases')
plt.title(date.strftime("%d %b %Y"))
ax = plt.gca()
ax.invert_yaxis()
# Removing borders
for spine in ax.spines.values():
spine.set_visible(False)
# Removing Tickmarks and values in X-axis
plt.tick_params(left=False, bottom=False, labelbottom=False)
# Labelling The bars directly
for bar in bars:
ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, ' ' + str(bar.get_width()), va='center')
Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …
# Each uniuqe date we have in covid_df
dates = np.sort(np.unique(covid_df.index.get_level_values(level=0)))
n = len(dates)
def update(curr):
if curr == n:
# Last date we have in covid_df
a.event_source.stop()
plt.cla()
date = dates[curr]
xvals = covid_df.loc[date].index
data = covid_df.loc[date].values
bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
plt.suptitle('Cumulative Confirmed Covid-19 Cases')
plt.title(pd.to_datetime(date).strftime("%d %b %Y"))
ax = plt.gca()
ax.invert_yaxis()
# Removing borders
for spine in ax.spines.values():
spine.set_visible(False)
# Removing Tickmarks and values in X-axis
plt.tick_params(left=False, bottom=False, labelbottom=False)
# Labelling The bars directly
for bar in bars:
ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, ' ' + str(bar.get_width()), va='center')
fig = plt.figure(figsize=[11,5]) #Adjusting margins
a = animation.FuncAnimation(fig, update, interval=100, frames=n,repeat=False)
Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …
a.save('final.mp4')