In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from datetime import timedelta
from datetime import date
import math
# get the number of days between min and max dates
# then calculate the number of periods by dividing the number of
# days by the number of datapoints and finally calculate
# new dates with equal distance between them

def get_new_dates(df):
dates = []
for study_type in df.Type.unique():
days_between = (max(df[df.Type==study_type]['Date']) - min(df[df.Type==study_type]['Date'])).days
period = math.ceil(days_between/(len(df[df.Type==study_type])-1))
list_dates = [min(df[df.Type==study_type]['Date'])]
for i in range(len(df[df.Type==study_type])-1):
list_dates.append(list_dates[-1] + timedelta(days=period))
dates.extend(list_dates)
return dates

def credits_cumsum(df):
cumsum = []
for study_type in df.Type.unique():
local_cumsum = df[df.Type==study_type]['Credits'].cumsum().values
cumsum.extend(local_cumsum)
return cumsum

new_average = []
for study_type in df.Type.unique():
new_average.extend(local_average)
return new_average

def moving_avg_avg(df, min_periods=1):
new_average = []
for study_type in df.Type.unique():
local_average = df[df.Type==study_type]['Average'].expanding(min_periods=min_periods).mean().values
new_average.extend(local_average)
return new_average

def print_general_stats(df):
print('European Credits:\n')
print("Psychology Bachelor: \t\t{}".format(sum(df[df.Type=='BA'].Credits)))
print("I/O Psychology Master: \t\t{}".format(sum(df[df.Type=='IO'].Credits)))
print("Clinical Psychology Premaster:\t{}".format(sum(df[df.Type=='PM-CL'].Credits)))
print("Clinical Psychology Master: \t{}".format(sum(df[df.Type=='CL'].Credits)))
print("Data Science Premaster: \t{}".format(sum(df[df.Type=='PM-DS'].Credits)))
print("Data Science Master: \t\t{}".format(sum(df[df.Type=='DS'].Credits)))
print("Total number of credits: \t{}".format(sum(df.Credits)))
print('Unweighted average:\n')
print("Total number of credits (excluding bachelor): \t{}".format(round(np.mean(df[(df.Grade != 'P') &

def plot_histogram(df):
df = df.sort_values('Date')
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_avg'] = moving_avg_avg(to_plot)

fig, ax = plt.subplots()
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10

for i, study_type in enumerate(df.Type.unique()):
color=colors[i], label=study_type)

plt.ylim(ymin=5.8, ymax=10.1)
fig.set_size_inches(15, 7)
plt.legend()
plt.show()
# plt.savefig('test.png', dpi=300)

In [2]:
df = pd.read_excel('grades_date_inaccurate.xlsx')
df.Date = pd.to_datetime(df.Date)
df.Course = df.apply(lambda row: row.Course.split('(')[0], 1)
df = df.sort_values('Date')


### General Stats¶

In [3]:
print_general_stats(df)

European Credits:

Psychology Bachelor: 		180
I/O Psychology Master: 		60
Clinical Psychology Premaster:	36
Clinical Psychology Master: 	60
Data Science Premaster: 	30
Data Science Master: 		78
Total number of credits: 	444
Unweighted average:

Psychology Bachelor: 		6.91
I/O Psychology Master: 		8.29
Clinical Psychology Premaster:	8.08
Clinical Psychology Master: 	8.5
Data Science Premaster: 	9.2
Data Science Master: 		8.38
Total number of credits (including bachelor): 	7.77
Total number of credits (excluding bachelor): 	8.45


### Largest difference¶

In [4]:
df['difference'] = df.apply(lambda row: row.Grade-row.Average if type(row.Grade)!=str else None, 1)
print('Largest difference:')
print(df[df.difference == df.difference.max()])

print('Smallest difference:')
print(df[df.difference == df.difference.min()])

to_plot_diff = pd.DataFrame(columns=["Course", "Grade", "Average", "Difference"])
small = list(df[df.difference == df.difference.min()][['Course', 'Grade', 'Average']].values[0])
big = list(df[df.difference == df.difference.max()][['Course', 'Grade', 'Average']].values[0])
to_plot_diff.loc[len(to_plot_diff)] = small + ['Small']
to_plot_diff.loc[len(to_plot_diff)] = big + ['Big']

Largest difference:
Course       Date Grade  Credits Type  Average  difference
37  Clinical Psychology 2015-10-01     9        6   CL      6.5         2.5
Smallest difference:
Course       Date Grade  Credits Type  Average  difference
38  Work Group Psychology 2013-12-16   8.5        6   IO     8.28        0.22

In [5]:
import numpy as np
import matplotlib.pyplot as plt

# data to plot
n_groups = 2
average = to_plot_diff.Average.values

# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 1

rects2 = plt.barh(index + bar_width, average, bar_width,alpha=opacity,color='#00bcd4',
label="Students' Average",edgecolor = "black", linewidth=4)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params(axis=u'both', which=u'both',length=0)
plt.xticks([])

ax.text(v-1.5, i-0.08, str(v), color='white', fontweight='bold', fontsize=22)
for i, v in enumerate(average):
ax.text(v-1.5, i+0.28, str(round(v, 1)), color='white', fontweight='bold', fontsize=22)

# plt.title('Smallest and largest difference between \n') # my grade and the average
# plt.text(0.6,9.65, 'and', fontsize=12)
# plt.text(0.74,9.65, 'the average', fontsize=12,color='#00bcd4')
plt.yticks(index + bar_width/2, to_plot_diff.Course.values)
ax.set_yticklabels(['Clinical\nPsychology', 'Work Group\nPsychology'])
# plt.legend()\
plt.tight_layout()
# plt.savefig('bar_difference.png',dpi=600, transparent=True)
plt.show()


### Visualization¶

In [6]:
plot_histogram(df)

In [7]:
df = df.sort_values('Date')
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=2)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)

fig, ax = plt.subplots(figsize=(15, 5))
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10
colors = ['#f44336','#607d8b','#4caf50','#00C4B4','#ffc107','#9c27b0']
colors = ['#03a9f4', '#29b6f6', '#4fc3f7', '#81d4fa', '#b3e5fc', '#e1f5fe']
colors.reverse()
colors = ['#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6']

for i, study_type in enumerate(to_plot.Type.unique()):
color=colors[i], alpha=.85, label=study_type, linewidth=1, edgecolor='black',
color=colors[i], alpha=0.5, label=study_type, linewidth=6)
'--', color=colors[i], alpha=0.5, label=study_type, linewidth=4)

plt.ylim(ymin=5.8, ymax=10.1)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')

plt.text(date(2013,8,30), 6.67,
fontsize=10, color='#f44336', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2014,8,25), 8,
fontsize=10, color='#607d8b', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,20), 7.78,
fontsize=10, color='#4caf50', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,9,20), 8.2,
fontsize=10, color='#00C4B4', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,9,10), 8.95,
fontsize=10, color='#ffc107', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,3,15), 8.1,
fontsize=10, color='#9c27b0', horizontalalignment='center', alpha=0.8, fontweight='bold')

plt.text(date(2014,8,1), 7.1,
str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#607d8b', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,15), 6.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#4caf50', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,8,15), 6.9,
str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#00C4B4', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,8,15), 7.6,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#ffc107', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,2,5), 6.85,
str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#9c27b0', horizontalalignment='center', alpha=0.8, fontweight='bold')

# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))

# Create custom legend
cmap = plt.cm.coolwarm

custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
#          fontsize=16)|

plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()

In [8]:
df = df.sort_values('Date')
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=2)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)
# df.loc[:, 'Date'] = df['Date'].dt.strftime("%Y%m%d").astype(int)

fig, ax = plt.subplots(figsize=(15, 5))
colors = ['#63ace5', '#83d0c9', '#d0e1f9', '#e6d4a2', '#93a8b0', '#dacdbe']
# colors = ['#dacdbe', '#63ace5', '#d0e1f9', '#e6d4a2', '#93a8b0', '#83d0c9']

# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#93a8b0', '#dacdbe']

# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#f0908a', '#dacdbe']

#4a707a

for i, study_type in enumerate(to_plot.Type.unique()):
color=colors[i], alpha=.9, label=study_type, linewidth=1, edgecolor='black',
color=colors[i], alpha=0.7, label=study_type, linewidth=6, zorder=0)
'--', color=colors[i], alpha=0.7, label=study_type, linewidth=4, zorder=0)

plt.ylim(ymin=5.8, ymax=10.1)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')

plt.text(date(2013,8,30), 6.67,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2014,8,25), 8,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,20), 7.78,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,9,20), 8.2,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,9,10), 8.95,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,3,15), 8.1,
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')

plt.text(date(2014,8,1), 7.1,
str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,15), 6.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,8,15), 6.9,
str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,8,15), 7.6,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,2,5), 6.85,
str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')

# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))

# Create custom legend
cmap = plt.cm.coolwarm

custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
#          fontsize=16)|

plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()

In [9]:
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.cbook import get_sample_data
def imscatter(x, y, image, ax=None, zoom=1):
if ax is None:
ax = plt.gca()
try:
except TypeError:
pass
im = OffsetImage(image, zoom=zoom)
x, y = np.atleast_1d(x, y)
artists = []
for x0, y0 in zip(x, y):
ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
ax.update_datalim(np.column_stack([x, y]))
ax.autoscale()
return artists

df = df.sort_values('Date')
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=1)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)
to_plot['date_int'] = to_plot[['new_date']].diff()
to_plot['date_int'] = to_plot[['date_int']].dropna().apply(lambda row:
row.date_int.days, 1)
to_plot['date_int'] = to_plot.date_int.fillna(0).values
to_plot['date_int'] = to_plot['date_int'].cumsum()

fig, ax = plt.subplots(figsize=(15, 5))
colors = ['#63ace5', '#83d0c9', '#d0e1f9', '#e6d4a2', '#93a8b0', '#dacdbe']
# colors = ['#dacdbe', '#63ace5', '#d0e1f9', '#e6d4a2', '#93a8b0', '#83d0c9']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#93a8b0', '#dacdbe']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#f0908a', '#dacdbe']

for i, study_type in enumerate(to_plot.Type.unique()):
color=colors[i], alpha=.9, label=study_type, linewidth=0, edgecolor='black',
color=colors[i], alpha=0.7, label=study_type, linewidth=6, zorder=0)
'--', color=colors[i], alpha=1, label=study_type, linewidth=4, zorder=0)
size_dict = {2:0.01, 5:0.011, 6:0.015, 12:0.02, 18:0.025, 24:0.03}
for x_i, y_i, size in zip(x, y, sizes):
imscatter(x_i, y_i, 'circle_3.png', zoom=size_dict[size], ax=ax)

plt.ylim(ymin=5.8, ymax=10.1)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
#          horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
#         horizontalalignment='center', fontweight='bold')

# plt.text(date(2013,8,30), 6.67,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2014,8,25), 8,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2015,8,20), 7.78,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2016,9,20), 8.2,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2017,9,10), 8.95,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2019,3,15), 8.1,
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')

# # add averages of average
# plt.text(date(2014,8,1), 7.1,
#          str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2015,8,15), 6.78,
#          str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2016,8,15), 6.9,
#          str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2017,8,15), 7.6,
#          str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2019,2,5), 6.85,
#          str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
#          fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')

# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
# ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))

# Create custom legend
cmap = plt.cm.coolwarm

custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
#          fontsize=16)|

plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()
# plt.savefig('circles_plot_full.png', dpi=600, transparent=True)

In [17]:
import matplotlib.dates as mdates
fig, ax = plt.subplots()
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10
colors = ['#56b4e9', 'purple', 'red', 'green', 'yellow', 'orange']

for i, study_type in enumerate(df.Type.unique()):
color=colors[i], alpha=0.8, label=study_type, linewidth=1,

plt.ylim(ymin=5.8, ymax=10.1)
fig.set_size_inches(15, 5)
# plt.legend()

# # # Set number of xticks
ax.xaxis.set_major_locator(mdates.DayLocator(interval=300))   #to get a tick every 60 days
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

plt.show()
# plt.savefig('test.png', dpi=300)


Compared to all other students

15% of students achieved the same or a higher result.

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from collections import namedtuple

avg_df = avg_df.sort_values('Avg', ascending=False)

# to_plot = avg_df.loc[avg_df.Education=='MS-DS']
to_plot = avg_df

n_groups = len(to_plot)

my_scores = to_plot.My.values
avg_scores = to_plot.fillna(0).Avg.values

fig, ax = plt.subplots(figsize=(10, 10))

index = np.arange(n_groups)
bar_width = 0.4
opacity = .8

rects1 = ax.barh(index, my_scores, bar_width,
alpha=opacity, color='#003f5c',

rects2 = ax.barh(index + bar_width, avg_scores, bar_width,
alpha=opacity, color='#ffa600',

ax.set_title('My grades during three different masters versus the average', fontsize=14, fontweight='bold')
plt.text(3.5,25.5,'[DS] = Data Science \n[CP]  = Clinical Psychology \n[IOP] = I/O Psychology')
ax.set_yticks(index + bar_width / 2)
ax.set_yticklabels(to_plot.Course.values)
ax.legend(loc='upper left',frameon=False)

for i, v in enumerate(my_scores):
ax.text(v-0.5, i, str(v), color='white', fontweight='bold', va='center', fontsize=10)

for i, v in enumerate(avg_scores):
if v == 0:
ax.text(v + .05, i+0.4, str(v), color='white', fontweight='bold', va='center')
else:
ax.text(v - .6, i+0.4, str(v), color='black', fontweight='bold', va='center')

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.get_xaxis().set_ticks([])

fig.tight_layout()
plt.show()
# plt.savefig('scores.png', dpi=300)