Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Set up the data

In [2]:
np.random.seed()
# create a dataframe with two columns "ages" and "sex"
df = pd.DataFrame({"ages" : np.random.randint(low = 10, high = 85, size = 51),
                   "sex" :  np.random.choice(a = ["M", "F", "U"], size = 51, #p = (0.3, 0.4, 0.3)
                                            )})

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ages    51 non-null     int32 
 1   sex     51 non-null     object
dtypes: int32(1), object(1)
memory usage: 740.0+ bytes

Dataframe

In [3]:
df
Out[3]:
ages sex
0 27 F
1 48 U
2 67 F
3 38 U
4 36 M
5 28 M
6 61 F
7 33 F
8 37 U
9 16 M
10 48 U
11 26 F
12 35 M
13 38 U
14 76 F
15 84 M
16 81 F
17 59 F
18 82 F
19 33 F
20 20 M
21 51 U
22 43 F
23 15 F
24 54 M
25 77 U
26 61 U
27 75 F
28 81 M
29 78 M
30 72 F
31 36 M
32 39 U
33 27 U
34 75 M
35 27 U
36 42 U
37 55 U
38 83 F
39 54 F
40 72 F
41 26 U
42 19 F
43 49 U
44 76 U
45 12 U
46 23 F
47 33 U
48 21 M
49 31 F
50 38 F

Divide age column in custom intervals

In [4]:
# define function to create custom intervals
def age_intervals(age):
    if age <= 17:
        return "17 or below"
    elif 17 < age <= 34:
        return "17 - 34"
    elif 34 < age < 51:
        return "34 - 51"
    elif 51 < age <= 71:
        return "51 - 71"
    else:
        return "71+"

# create new column with customized intervals    
df["age_group"] = df["ages"].apply(age_intervals)

df["age_group"].value_counts()
Out[4]:
17 - 34        14
71+            14
34 - 51        13
51 - 71         7
17 or below     3
Name: age_group, dtype: int64
In [5]:
df_grouped = df.groupby(["sex", "age_group"], dropna = False, as_index = False).agg({"ages" : np.size}).sort_values("age_group")

# append additional rows to avoid length mismatch when 
# Missing groups may be required as data was generated randomly
# df_grouped.loc[len(df_grouped)] = ["M", "71+", 0]
# df_grouped.loc[len(df_grouped)] = ["U", "17 or below", 0]

# df_grouped.sort_values(["sex", "ages"])
In [6]:
fig, ax = plt.subplots(figsize = (12, 5))

sns.barplot(data = df,
            x = "age_group",
            y = "ages",
            hue = "sex",
            ci = 0,
            estimator = np.size)

plt.show()

!Warning! The below plot is wrong and has been kept that way deliberately!

In [7]:
# age_groups to be used as labels
labels = df["age_group"].unique()

print("labels:", labels)

# no. of male patients for each age_group
men_count = df_grouped.loc[df_grouped["sex"] == "M", "ages"].values

# no. of female patients for each age_group
women_count = df_grouped.loc[df_grouped["sex"] == "F", "ages"].values

# no. of unknown gender patients for each age_group
unknown_count = df_grouped.loc[df_grouped["sex"] == "U", "ages"].values

print("Men count = {}, Women count = {}, Unknown count = {}".format(men_count, women_count, unknown_count))

x = np.arange(len(labels))  # the label locations
width = 0.3  # the width of the bars


fig, ax = plt.subplots(figsize = (12, 5))
rects1 = ax.bar(x = x - width, 
                height = men_count, 
                width = width, 
                label='Men')
rects2 = ax.bar(x = x ,
                height = women_count,
                width = width,
                label='Women')
rects3 = ax.bar(x = x + width,
                height = unknown_count, 
                width = width, 
                label='Unknown')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend(bbox_to_anchor = (1.15, 1))

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)
ax.bar_label(rects3, padding=3)

fig.tight_layout()

plt.show()
labels: ['17 - 34' '34 - 51' '51 - 71' '17 or below' '71+']
Men count = [3 1 3 1 4], Women count = [7 1 2 4 7], Unknown count = [4 1 8 2 3]

Series.isin()

In [8]:
filter_list = ['myo','peri', 'aortic', 'heart', 'artery', 'veins',]
print("filter_list:", filter_list, "\n")

dummy_df = pd.DataFrame({"symptom1" : ['myo','peri', 'aortic', 'heart'], 
                         "symptom2" : ['patient has MYO','Peri conditions', 'aorticregion', 'heart disease']})

# this will match as each word is same as present in filter list
print(dummy_df["symptom1"].isin(filter_list))

# this will not match
print(dummy_df["symptom2"].isin(filter_list))
filter_list: ['myo', 'peri', 'aortic', 'heart', 'artery', 'veins'] 

0    True
1    True
2    True
3    True
Name: symptom1, dtype: bool
0    False
1    False
2    False
3    False
Name: symptom2, dtype: bool

Your code

In [9]:
# width = 0.3

# babies = covid_vaers_cardio[(covid_vaers_cardio.AGE_YRS < 7)]
# kids = covid_vaers_cardio[(7 <= covid_vaers_cardio.AGE_YRS ) & (covid_vaers_cardio.AGE_YRS < 13)]
# teens = covid_vaers_cardio[(13 <= covid_vaers_cardio.AGE_YRS ) & (covid_vaers_cardio.AGE_YRS < 19)]
# young_adults = covid_vaers_cardio[(19 <= covid_vaers_cardio.AGE_YRS) & (covid_vaers_cardio.AGE_YRS < 30)]
# adults = covid_vaers_cardio[(30 <= covid_vaers_cardio.AGE_YRS) & (covid_vaers_cardio.AGE_YRS < 40)]
# older_adults = covid_vaers_cardio[(40 <= covid_vaers_cardio.AGE_YRS) & (covid_vaers_cardio.AGE_YRS < 50)]
# halfway_adults = covid_vaers_cardio[(50 <= covid_vaers_cardio.AGE_YRS) & (covid_vaers_cardio.AGE_YRS < 60)]
# retirement_adults = covid_vaers_cardio[(60 <= covid_vaers_cardio.AGE_YRS) & (covid_vaers_cardio.AGE_YRS < 70)]
# oldies = covid_vaers_cardio[(covid_vaers_cardio.AGE_YRS >= 70)]

# x = ['babies', 'kids', 'teens', 'young_adults', 'adults', 'older_adults', 'halfway_adults', 'retirement_adults', 'oldies']
# Male = covid_vaers_cardio[(covid_vaers_cardio.SEX == 'M')]
# Female = covid_vaers_cardio[(covid_vaers_cardio.SEX == 'F')]
# Unknown = covid_vaers_cardio[(covid_vaers_cardio.SEX == 'U')]

# bar1 = np.arange(len(x))
# bar2 = [i+width for i in bar1]
# bar3 = [i+width for i in bar2]
# # bar1, bar2, bar3

# plt.bar(bar1, Male, width, label = 'Male')
# plt.bar(bar2, Female, width, label = 'Female')
# plt.bar(bar3, Unknown, width, label = 'Unknown')

# plt.xlabel("Age")
# plt.ylabel("Total Number of Adverse Events")
# plt.title("How Cardiovascular Symptoms are Distributed among the COVID19 Vaccinated by Age and Sex")
# plt.xticks(bar1+width, x)
# plt.legend()

# plt.show()