#!/usr/bin/env python # coding: utf-8 # ## The New York Times Covid-19 Database # # The New York Times Covid-19 Database is # a county-level database of confirmed cases and deaths, # compiled from state and local governments and health departments across the United States. # The initial release of the database was on Thursday, March 26, 2020, and it is updated daily. # # These data have fueled many articles and graphics by The Times; these are updated regularly at # [https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). # The Times has created many visualizations that are effective communications of important information about the pandemic. # # The data are publically available via GitHub: [https://github.com/nytimes/covid-19-data](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). In this illustration we will only use # the data aggregated at the state level. # In[ ]: from datascience import * import numpy as np get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plots plots.style.use('fivethirtyeight') # In[ ]: covid_table = Table.read_table("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv") covid_table = covid_table.drop('fips') covid_table # ### Process the recent data # # Let's plot Covid-19 statistics for Connecticut. We could also only use data after a particular date. # In[ ]: state = 'Connecticut' start_date = '2020-03-08' # computations can be made faseter if we look at only more recent dates '2021-10-01' state_data = covid_table.where('state', state) state_data = state_data.where('date', are.above(start_date)) state_data.plot("date", "cases") # In[ ]: # Let's look at new cases rather than the cumulative number of cases # we can use the np.diff() function to get the number of new cases each day num_new_cases = np.diff(state_data.column('cases')) num_new_cases # In[ ]: ### Can we add new cases to our state_data Table? # state_data.with_column("new cases", num_new_cases) # In[ ]: # let's examine now many entries there are in new cases vs. the number of rows in the state_data table print(len(num_new_cases)) state_data.num_rows # In[ ]: # let's add an extra 0 to make num_new_cases vector the same length as the number of rows in the state_data Table num_new_cases2 = np.insert(num_new_cases, 0, 0) len(num_new_cases2) # In[ ]: state_data_with_new_cases = state_data.with_column("new cases", num_new_cases2) state_data_with_new_cases # In[ ]: # create line plot treating date state_data_with_new_cases.plot("date", "new cases") # state_data_with_new_cases.scatter('date', 'new cases') # In[ ]: # visualizating data treating date as a categorical variable state_data_with_new_cases.bar('date', 'new cases') plots.xticks(rotation=90) plots.xlabel(''); # Compare this to the graph here: [https://github.com/nytimes/covid-19-data](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). # # Now, your task is to modify the above code in order to display a plot of the new *deaths* each day. (Hint: You only need to change one word!) What stands out to you upon comparing the plots of new cases and new deaths? #