import pandas as pd
cases = pd.read_csv('./data/WHO-COVID-19-global-data.csv')
cases
Date_reported | Country_code | Country | WHO_region | New_cases | Cumulative_cases | New_deaths | Cumulative_deaths | |
---|---|---|---|---|---|---|---|---|
0 | 2020-01-03 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
1 | 2020-01-04 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
2 | 2020-01-05 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
3 | 2020-01-06 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
4 | 2020-01-07 | AF | Afghanistan | EMRO | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
164473 | 2021-11-22 | ZW | Zimbabwe | AFRO | 9 | 133647 | 0 | 4699 |
164474 | 2021-11-23 | ZW | Zimbabwe | AFRO | 27 | 133674 | 0 | 4699 |
164475 | 2021-11-24 | ZW | Zimbabwe | AFRO | 33 | 133707 | 4 | 4703 |
164476 | 2021-11-25 | ZW | Zimbabwe | AFRO | 40 | 133747 | 0 | 4703 |
164477 | 2021-11-26 | ZW | Zimbabwe | AFRO | 27 | 133774 | 1 | 4704 |
164478 rows × 8 columns
cases.WHO_region.unique()
array(['EMRO', 'EURO', 'AFRO', 'WPRO', 'AMRO', 'SEARO', 'Other'], dtype=object)
cases.loc[cases.WHO_region == 'AMRO'].Country.unique()
array(['Anguilla', 'Antigua and Barbuda', 'Argentina', 'Aruba', 'Bahamas', 'Barbados', 'Belize', 'Bermuda', 'Bolivia (Plurinational State of)', 'Bonaire', 'Brazil', 'British Virgin Islands', 'Canada', 'Cayman Islands', 'Chile', 'Colombia', 'Costa Rica', 'Cuba', 'Curaçao', 'Dominica', 'Dominican Republic', 'Ecuador', 'El Salvador', 'Falkland Islands (Malvinas)', 'French Guiana', 'Grenada', 'Guadeloupe', 'Guatemala', 'Guyana', 'Haiti', 'Honduras', 'Jamaica', 'Martinique', 'Mexico', 'Montserrat', 'Nicaragua', 'Panama', 'Paraguay', 'Peru', 'Puerto Rico', 'Saba', 'Saint Barthélemy', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Martin', 'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines', 'Sint Eustatius', 'Sint Maarten', 'Suriname', 'Trinidad and Tobago', 'Turks and Caicos Islands', 'United States of America', 'United States Virgin Islands', 'Uruguay', 'Venezuela (Bolivarian Republic of)'], dtype=object)
cases.loc[cases.Country == 'United States of America']
Date_reported | Country_code | Country | WHO_region | New_cases | Cumulative_cases | New_deaths | Cumulative_deaths | |
---|---|---|---|---|---|---|---|---|
156844 | 2020-01-03 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156845 | 2020-01-04 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156846 | 2020-01-05 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156847 | 2020-01-06 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156848 | 2020-01-07 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
157533 | 2021-11-22 | US | United States of America | AMRO | 32331 | 47405130 | 269 | 767185 |
157534 | 2021-11-23 | US | United States of America | AMRO | 32036 | 47437166 | 111 | 767296 |
157535 | 2021-11-24 | US | United States of America | AMRO | 162130 | 47599296 | 1269 | 768565 |
157536 | 2021-11-25 | US | United States of America | AMRO | 102708 | 47702004 | 1597 | 770162 |
157537 | 2021-11-26 | US | United States of America | AMRO | 100455 | 47802459 | 1367 | 771529 |
694 rows × 8 columns
us = cases.loc[cases.Country == 'United States of America']
us.Date_reported = pd.to_datetime(us.Date_reported, format = '%Y-%m-%d')
/home/alexkeeney/anaconda3/envs/bs/lib/python3.8/site-packages/pandas/core/frame.py:3607: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._set_item(key, value)
us
Date_reported | Country_code | Country | WHO_region | New_cases | Cumulative_cases | New_deaths | Cumulative_deaths | |
---|---|---|---|---|---|---|---|---|
156844 | 2020-01-03 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156845 | 2020-01-04 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156846 | 2020-01-05 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156847 | 2020-01-06 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
156848 | 2020-01-07 | US | United States of America | AMRO | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
157533 | 2021-11-22 | US | United States of America | AMRO | 32331 | 47405130 | 269 | 767185 |
157534 | 2021-11-23 | US | United States of America | AMRO | 32036 | 47437166 | 111 | 767296 |
157535 | 2021-11-24 | US | United States of America | AMRO | 162130 | 47599296 | 1269 | 768565 |
157536 | 2021-11-25 | US | United States of America | AMRO | 102708 | 47702004 | 1597 | 770162 |
157537 | 2021-11-26 | US | United States of America | AMRO | 100455 | 47802459 | 1367 | 771529 |
694 rows × 8 columns
import altair as alt
alt.Chart(us).mark_line().encode(
x = 'Date_reported:T',
y = 'New_cases'
)
alt.Chart(us).mark_line().encode(
x = 'Date_reported:T',
y = 'New_deaths'
)
alt.Chart(us).mark_line().encode(
x = 'Date_reported:T',
y = 'Cumulative_cases'
)
alt.Chart(us).mark_line().encode(
x = 'Date_reported:T',
y = 'Cumulative_deaths'
)
alt.Chart(
us.loc[us.Date_reported > pd.Timestamp('07-15-2021')]
).mark_line().encode(
x = 'Date_reported:T',
y = 'New_cases'
)
alt.Chart(
us.loc[us.Date_reported > pd.Timestamp('07-15-2021')]
).mark_line().encode(
x = 'Date_reported:T',
y = 'Cumulative_cases'
)
us_delta = us.loc[us.Date_reported > pd.Timestamp('07-15-2021')].copy()
us_delta.Cumulative_cases \
- us_delta.Cumulative_cases.min()
157404 0 157405 40447 157406 82632 157407 113953 157408 142189 ... 157533 13592445 157534 13624481 157535 13786611 157536 13889319 157537 13989774 Name: Cumulative_cases, Length: 134, dtype: int64
us_delta['new_cumulative_cases'] = us_delta.Cumulative_cases - us_delta.Cumulative_cases.min()
alt.Chart(
us_delta
).mark_line().encode(
x = 'Date_reported:T',
y = 'new_cumulative_cases'
)
us_delta.to_csv('./data/us_july_15-present.csv')
max_cases = us_delta.New_cases.max()
us_delta.loc[us_delta.New_cases == max_cases]
Date_reported | Country_code | Country | WHO_region | New_cases | Cumulative_cases | New_deaths | Cumulative_deaths | new_cumulative_cases | |
---|---|---|---|---|---|---|---|---|---|
157453 | 2021-09-03 | US | United States of America | AMRO | 198453 | 39317981 | 1994 | 646132 | 5505296 |