# Visualizing the Gender Gap in College Degrees¶

### Introduction¶

The aim of this project is to visualize the gender gap across college degrees. The first step is to create line charts to compare across these degree categories: Engineering, Computer Science, Psychology, Biology, Physical Sciences, Math and Statistics.

In [3]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']

fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()


### Comparing across all degree categories¶¶

Because there are seventeen degrees that we need to generate line charts for, we'll use a subplot grid layout of 6 rows by 3 columns.

In [4]:
stem_cats = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']

In [5]:
fig = plt.figure(figsize=(16, 20))

## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")

if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')

## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")

if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')

## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")

if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')

plt.show()


### Hiding x-axis labels¶

We remove the x-axis labels for every line chart in a column except for the bottom most one, so that the plots are easier to read.

In [6]:
fig = plt.figure(figsize=(16, 16))

## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')

if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')

## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')

if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')

## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')

if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')

plt.show()


### Setting y-axis labels¶

We can also simplify the y-axis labels by keeping just the starting and ending labels (0 and 100).

In [7]:
fig = plt.figure(figsize=(16, 16))

## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')

## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')

## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')

plt.show()


The plot is now cleaner to read without all the y-axis labels, however it is hard to understand which degrees have close to 50-50 gender breakdown. While keeping all of the y-axis labels would have made it easier, we can actually do one better and use a horizontal line across all of the line charts where the y-axis label 50 would have been.

In [8]:
fig = plt.figure(figsize=(16, 16))

## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)

if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')

## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)

if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')

## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)

if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')

plt.show()


### Exporting to a file¶

We export all of the plots contained in the figure as a single image file.

In [9]:
# Set backend to Agg.
fig = plt.figure(figsize=(16, 16))

## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')

## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 75, 'Women')
ax.text(2005, 20, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')

## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
ax.set_yticks([0,100])

if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')

# Export file before calling pyplot.show()
fig.savefig("gender_degrees.png")
plt.show()


### Conclusion¶

From the graph we can see that the college majors with the greatest gap in gender diversity in the 2010s are: Health Professions, Psycology, Computer Science, Public Administration and Engineering.

In [ ]: