#!/usr/bin/env python # coding: utf-8 #

Dataset 1: Conditions_Contributing_to_COVID-19_Deaths__by_State_and_Age__Provisional_2020-2023.csv

# In[1]: import pandas as pd file_path_1 = 'Conditions_Contributing_to_COVID-19_Deaths__by_State_and_Age__Provisional_2020-2023.csv' data_1 = pd.read_csv(file_path_1) print(data_1.head()) # In[2]: data_1_shape = data_1.shape # Descriptive statistics for all columns data_1_describe = data_1.describe(include='all') # Display the last few rows of the DataFrame data_1_tail = data_1.tail() # Display the data types of each column data_1_dtypes = data_1.dtypes data_1_shape, data_1_describe, data_1_tail, data_1_dtypes # In[ ]: # In[3]: data_1 = pd.DataFrame(data_1) # Convert dates to datetime data_1['Data As Of'] = pd.to_datetime(data_1['Data As Of']) data_1['Start Date'] = pd.to_datetime(data_1['Start Date']) data_1 ['End Date'] = pd.to_datetime(data_1['End Date']) # Display the DataFrame print(data_1) # In[4]: # Since the dataframe is named data_1, let's perform the analysis using that correct name import matplotlib.pyplot as plt import seaborn as sns # Frequency distribution of Age Group age_group_counts_data_1 = data_1['Age Group'].value_counts() # Bar chart of Age Group counts in 'data_1' plt.figure(figsize=(10, 6)) plt.bar(age_group_counts_data_1.index, age_group_counts_data_1.values, color='skyblue') plt.title('Frequency Distribution of Age Groups in Data 1') plt.xlabel('Age Group') plt.ylabel('Frequency') plt.xticks(rotation=45) # Rotate x-axis labels to show clearly plt.show() # Boxplot of COVID-19 Deaths by Age Group in 'data_1' plt.figure(figsize=(10, 6)) sns.boxplot(x='Age Group', y='COVID-19 Deaths', data=data_1) plt.title('COVID-19 Deaths by Age Group in Data 1') plt.xlabel('Age Group') plt.ylabel('COVID-19 Deaths') plt.show() # In[5]: import matplotlib.pyplot as plt import seaborn as sns # Bar chart for 'Condition Group' with vertical x-axis labels plt.figure(figsize=(12, 6)) barplot1 = sns.barplot(x='Condition Group', y='COVID-19 Deaths', hue='Age Group Numeric', data=data_1) plt.title('COVID-19 Deaths by Condition Group and Age Group') plt.xlabel('Condition Group') plt.ylabel('COVID-19 Deaths') plt.legend(title='Age Group') barplot1.set_xticklabels(barplot1.get_xticklabels(), rotation=90) # Rotate x-axis labels plt.show() plt.figure(figsize=(12, 6)) barplot2 = sns.barplot(x='State', y='COVID-19 Deaths', hue='Age Group Numeric', data=data_1) plt.title('COVID-19 Deaths by State and Age Group') plt.xlabel('State') plt.ylabel('COVID-19 Deaths') plt.legend(title='Age Group') barplot2.set_xticklabels(barplot2.get_xticklabels(), rotation=90) # Rotate x-axis labels plt.show() # In[ ]: # We will remove 'United States' from the 'State' column and then recreate the bar plot. # Check if there are other states in the dataset besides 'United States' unique_states = data_1['State'].unique() # If 'United States' is the only state, the following code will not be able to create a meaningful plot. # We'll proceed under the assumption that there are other states in the full dataset. # Filter out the 'United States' entry from the dataset data_1_no_us = data_1[data_1['State'] != 'United States'] # Now let's create the bar plot without 'United States' plt.figure(figsize=(12, 6)) barplot_no_us = sns.barplot(x='State', y='COVID-19 Deaths', hue='Age Group Numeric', data=data_1_no_us) plt.title('COVID-19 Deaths by State and Age Group (excluding United States)') plt.xlabel('State') plt.ylabel('COVID-19 Deaths') plt.legend(title='Age Group') barplot_no_us.set_xticklabels(barplot_no_us.get_xticklabels(), rotation=90) # Rotate x-axis labels plt.tight_layout() # This will adjust the plot to make sure everything fits without overlapping plt.show() # In[ ]: pip install ydata-profiling # In[ ]: from ydata_profiling import ProfileReport ProfileReport(data_1)