%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
# Method to turn the top, bottom, left and right spines in the given axes.
def turn_spines_off(ax):
for spine in ax.spines:
ax.spines[spine].set_visible(False)
return ax
# Annotate top most chart under each category with the gender represented by the lines
def annotate_top_text(ax, category):
if category == 'stem_cats':
ax.text(2005, 87, 'Women')
ax.text(2006, 8, 'Men')
elif category == 'lib_arts_cats':
ax.text(2005, 80, 'Women')
ax.text(2006, 20, 'Men')
elif category == 'other_cats':
ax.text(2005, 90, 'Women')
ax.text(2006, 5, 'Men')
return ax
# Annotate bottom most chart under each category with the gender represented by the lines
def annotate_bottom_text(ax, category):
# The lib_arts_cats category bottommost chart lines overlap so gender could not be annotated in that category
if category == 'stem_cats':
ax.text(2005, 87, 'Men')
ax.text(2004, 8, 'Women')
elif category == 'other_cats':
ax.text(2005, 65, 'Men')
ax.text(2004, 25, 'Women')
return ax
# Read the csv file into women_degrees data frame
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
# colors chosen compatible for color blinded readers - Blue for Women, Orange for Men
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
# majors - dictionary to hold list of courses in the stem, lib_arts and other categories
majors = {}
majors['stem_cats'] = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
majors['lib_arts_cats'] = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
majors['other_cats'] = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
# Allocate we have enough space for plotting 17 charts
fig = plt.figure(figsize=(30, 25))
# For loop for each of the college major categories
idx = 1
for cat in majors:
# Iterate the courses under each major category
for sp in range(0,len(majors[cat])):
# 6 * 3 matrix with rows representing gender percentages in each major, columns representing their categories
ax = fig.add_subplot(6,3, idx + len(majors) * sp)
ax.plot(women_degrees['Year'], women_degrees[majors[cat][sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[majors[cat][sp]], c=cb_orange, label='Men', linewidth=3)
# turn_spines_off method returns axes with spines turned off
ax = turn_spines_off(ax)
# x and y axis upper and lower limits set to plot the chart
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# The major name is set as the title
ax.set_title(majors[cat][sp])
ax.set_yticks([0,100])
# Grey horizontal line to increase readability of the line charts
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
# turn the tick params to improve readability of plotted data
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
if sp == 0:
# annotate the gender represented by the lines in the topmost chart in each category
ax = annotate_top_text(ax, cat)
idx += 1
# bottom most ticks are turned to give clarity of the plot
ax.tick_params(labelbottom='on')
# annotate the gender represented by the lines in the bottommost chart where needed
ax = annotate_bottom_text(ax, cat)
# The plotted line charts are saved as image file gender_degrees.png
plt.savefig("gender_degrees.png")
plt.show()