#!/usr/bin/env python
# coding: utf-8

# ## The New York Times Covid-19 Database
# 
# The New York Times Covid-19 Database is 
# a county-level database of confirmed cases and deaths, 
# compiled from state and local governments and health departments across the United States.
# The initial release of the database was on Thursday, March 26, 2020, and it is updated daily. 
# 
# These data have fueled many articles and graphics by The Times; these are updated regularly at
# [https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). 
# The Times has created many visualizations that are effective communications of important information about the pandemic. 
# 
# The data are publically available via GitHub: [https://github.com/nytimes/covid-19-data](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html).  In this illustration we will only use
# the data aggregated at the state level. 

# In[ ]:


from datascience import *
import numpy as np

get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')


# In[ ]:


covid_table = Table.read_table("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
covid_table = covid_table.drop('fips')
covid_table


# ### Process the recent data
# 
# Let's plot Covid-19 statistics for Connecticut. We could also only use data after a particular date. 

# In[ ]:


state = 'Connecticut'
start_date = '2020-03-08'   # computations can be made faseter if we look at only more recent dates '2021-10-01'
state_data = covid_table.where('state', state)
state_data = state_data.where('date', are.above(start_date))
state_data.plot("date", "cases")


# In[ ]:


# Let's look at new cases rather than the cumulative number of cases
# we can use the np.diff() function to get the number of new cases each day

num_new_cases = np.diff(state_data.column('cases'))
num_new_cases


# In[ ]:


### Can we add new cases to our state_data Table? 
# state_data.with_column("new cases", num_new_cases)


# In[ ]:


# let's examine now many entries there are in new cases vs. the number of rows in the state_data table
print(len(num_new_cases))

state_data.num_rows


# In[ ]:


# let's add an extra 0 to make num_new_cases vector the same length as the number of rows in the state_data Table 

num_new_cases2 = np.insert(num_new_cases, 0, 0)

len(num_new_cases2)


# In[ ]:


state_data_with_new_cases = state_data.with_column("new cases", num_new_cases2)
state_data_with_new_cases


# In[ ]:


# create line plot treating date 
state_data_with_new_cases.plot("date", "new cases")
# state_data_with_new_cases.scatter('date', 'new cases')


# In[ ]:


# visualizating data treating date as a categorical variable
state_data_with_new_cases.bar('date', 'new cases')
plots.xticks(rotation=90) 
plots.xlabel('');


# Compare this to the graph here: [https://github.com/nytimes/covid-19-data](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html).
# 
# Now, your task is to modify the above code in order to display a plot of the new *deaths* each day. (Hint: You only need to change one word!) What stands out to you upon comparing the plots of new cases and new deaths? 
#