In this project we will be working on visualizing the data from The Department of Education Statistics.
The department releases a data set annually containing the percentage of bachelor's degrees granted to women from 1970 to 2012. The data set is broken up into 17 categories of degrees, with each column as a separate category. The dataset for this project, compiled by Randal Olsan, a data scientist at the University of Pennsylvania can be downloaded here.
To compare the gender gap in all degree categories using data visualization.
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))
for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
ax.legend(loc='best')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()
fig = plt.figure(figsize=(18, 20))
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism',
'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education',
'Agriculture','Business', 'Architecture']
for sp in range(0,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(stem_cats[index])
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
if index == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for key,spine in ax.spines.items():
spine.set_visible(False)
for sp in range(1,16,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(lib_arts_cats[index])
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 73, 'Women')
ax.text(2002, 14, 'Men')
ax.legend(loc='best')
elif index == 4:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for sp in range(2,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(other_cats[index])
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 87, 'Women')
ax.text(2002, 8, 'Men')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
plt.show()
To hide the x-axis labels, we set the labelbottom
to off
in the Axes.tickparams()
:
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
To declutter the charts, we are disabling the x-axis labels of al the line charts except the bottommost line chart in each column.
fig = plt.figure(figsize=(18, 20))
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism',
'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education',
'Agriculture','Business', 'Architecture']
for sp in range(0,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(stem_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
if index == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for key,spine in ax.spines.items():
spine.set_visible(False)
for sp in range(1,16,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(lib_arts_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 73, 'Women')
ax.text(2002, 14, 'Men')
ax.legend(loc='best')
elif index == 4:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for sp in range(2,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(other_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 87, 'Women')
ax.text(2002, 8, 'Men')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
plt.show()
For all the plots, we are setting y-axis labels to 0
and 100
fig = plt.figure(figsize=(18, 20))
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism',
'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education',
'Agriculture','Business', 'Architecture']
for sp in range(0,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(stem_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
if index == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for key,spine in ax.spines.items():
spine.set_visible(False)
for sp in range(1,16,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(lib_arts_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 73, 'Women')
ax.text(2002, 14, 'Men')
ax.legend(loc='best')
elif index == 4:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for sp in range(2,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(other_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 87, 'Women')
ax.text(2002, 8, 'Men')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
plt.show()
For all plots, to generate a horizontal line we use Axes.axhline()
fig = plt.figure(figsize=(18, 20))
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism',
'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education',
'Agriculture','Business', 'Architecture']
for sp in range(0,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(stem_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if index == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for key,spine in ax.spines.items():
spine.set_visible(False)
for sp in range(1,16,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(lib_arts_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 73, 'Women')
ax.text(2002, 14, 'Men')
ax.legend(loc='best')
elif index == 4:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for sp in range(2,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(other_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 87, 'Women')
ax.text(2002, 8, 'Men')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
plt.show()
.png
file¶fig = plt.figure(figsize=(18, 20))
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism',
'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education',
'Agriculture','Business', 'Architecture']
for sp in range(0,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(stem_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if index == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for key,spine in ax.spines.items():
spine.set_visible(False)
for sp in range(1,16,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(lib_arts_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 73, 'Women')
ax.text(2002, 14, 'Men')
ax.legend(loc='best')
elif index == 4:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
for sp in range(2,18,3):
index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3)
ax.set_title(other_cats[index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# set starting and ending labels (0 and 100)
ax.set_yticks([0,100])
# add horizontal line
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
for key,spine in ax.spines.items():
spine.set_visible(False)
if index == 0:
ax.text(2005, 87, 'Women')
ax.text(2002, 8, 'Men')
ax.legend(loc='best')
elif index == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot
fig.savefig("gender_degrees.png")
plt.show()