import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from datetime import timedelta
from datetime import date
import math
# get the number of days between min and max dates
# then calculate the number of periods by dividing the number of
# days by the number of datapoints and finally calculate
# new dates with equal distance between them
def get_new_dates(df):
dates = []
for study_type in df.Type.unique():
days_between = (max(df[df.Type==study_type]['Date']) - min(df[df.Type==study_type]['Date'])).days
period = math.ceil(days_between/(len(df[df.Type==study_type])-1))
list_dates = [min(df[df.Type==study_type]['Date'])]
for i in range(len(df[df.Type==study_type])-1):
list_dates.append(list_dates[-1] + timedelta(days=period))
dates.extend(list_dates)
return dates
def credits_cumsum(df):
cumsum = []
for study_type in df.Type.unique():
local_cumsum = df[df.Type==study_type]['Credits'].cumsum().values
cumsum.extend(local_cumsum)
return cumsum
def moving_avg_grade(df, min_periods=1):
new_average = []
for study_type in df.Type.unique():
local_average = df[df.Type==study_type]['Grade'].expanding(min_periods=min_periods).mean().values
new_average.extend(local_average)
return new_average
def moving_avg_avg(df, min_periods=1):
new_average = []
for study_type in df.Type.unique():
local_average = df[df.Type==study_type]['Average'].expanding(min_periods=min_periods).mean().values
new_average.extend(local_average)
return new_average
def print_general_stats(df):
print('European Credits:\n')
print("Psychology Bachelor: \t\t{}".format(sum(df[df.Type=='BA'].Credits)))
print("I/O Psychology Master: \t\t{}".format(sum(df[df.Type=='IO'].Credits)))
print("Clinical Psychology Premaster:\t{}".format(sum(df[df.Type=='PM-CL'].Credits)))
print("Clinical Psychology Master: \t{}".format(sum(df[df.Type=='CL'].Credits)))
print("Data Science Premaster: \t{}".format(sum(df[df.Type=='PM-DS'].Credits)))
print("Data Science Master: \t\t{}".format(sum(df[df.Type=='DS'].Credits)))
print("Total number of credits: \t{}".format(sum(df.Credits)))
print('Unweighted average:\n')
print("Psychology Bachelor: \t\t{}".format(round(np.mean(df[(df.Type=='BA') & (df.Grade != 'P')].Grade), 2)))
print("I/O Psychology Master: \t\t{}".format(round(np.mean(df[df.Type=='IO'].Grade), 2)))
print("Clinical Psychology Premaster:\t{}".format(round(np.mean(df[df.Type=='PM-CL'].Grade), 2)))
print("Clinical Psychology Master: \t{}".format(round(np.mean(df[df.Type=='CL'].Grade), 2)))
print("Data Science Premaster: \t{}".format(round(np.mean(df[df.Type=='PM-DS'].Grade), 2)))
print("Data Science Master: \t\t{}".format(round(np.mean(df[df.Type=='DS'].Grade), 2)))
print("Total number of credits (including bachelor): \t{}".format(round(np.mean(df[(df.Grade != 'P')].Grade), 2)))
print("Total number of credits (excluding bachelor): \t{}".format(round(np.mean(df[(df.Grade != 'P') &
(df.Type != 'BA')].Grade), 2)))
def plot_histogram(df):
df = df.sort_values('Date')
to_plot = df.loc[df.Grade!='P', :].copy()
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_grade'] = moving_avg_grade(to_plot)
to_plot['mov_avg'] = moving_avg_avg(to_plot)
fig, ax = plt.subplots()
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10
for i, study_type in enumerate(df.Type.unique()):
plt.bar(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Grade.values.astype(float), 40,
color=colors[i], label=study_type)
plt.ylim(ymin=5.8, ymax=10.1)
fig.set_size_inches(15, 7)
plt.legend()
plt.show()
# plt.savefig('test.png', dpi=300)
df = pd.read_excel('grades_date_inaccurate.xlsx')
df.Date = pd.to_datetime(df.Date)
df.Course = df.apply(lambda row: row.Course.split('(')[0], 1)
df = df.sort_values('Date')
print_general_stats(df)
European Credits: Psychology Bachelor: 180 I/O Psychology Master: 60 Clinical Psychology Premaster: 36 Clinical Psychology Master: 60 Data Science Premaster: 30 Data Science Master: 78 Total number of credits: 444 Unweighted average: Psychology Bachelor: 6.91 I/O Psychology Master: 8.29 Clinical Psychology Premaster: 8.08 Clinical Psychology Master: 8.5 Data Science Premaster: 9.2 Data Science Master: 8.38 Total number of credits (including bachelor): 7.77 Total number of credits (excluding bachelor): 8.45
df['difference'] = df.apply(lambda row: row.Grade-row.Average if type(row.Grade)!=str else None, 1)
print('Largest difference:')
print(df[df.difference == df.difference.max()])
print('Smallest difference:')
print(df[df.difference == df.difference.min()])
to_plot_diff = pd.DataFrame(columns=["Course", "Grade", "Average", "Difference"])
small = list(df[df.difference == df.difference.min()][['Course', 'Grade', 'Average']].values[0])
big = list(df[df.difference == df.difference.max()][['Course', 'Grade', 'Average']].values[0])
to_plot_diff.loc[len(to_plot_diff)] = small + ['Small']
to_plot_diff.loc[len(to_plot_diff)] = big + ['Big']
Largest difference: Course Date Grade Credits Type Average difference 37 Clinical Psychology 2015-10-01 9 6 CL 6.5 2.5 Smallest difference: Course Date Grade Credits Type Average difference 38 Work Group Psychology 2013-12-16 8.5 6 IO 8.28 0.22
import numpy as np
import matplotlib.pyplot as plt
# data to plot
n_groups = 2
grades = to_plot_diff.Grade.values
average = to_plot_diff.Average.values
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 1
rects1 = plt.barh(index, grades, bar_width,alpha=opacity,color='#1976d2',
label='My Grade',edgecolor = "black", linewidth=4)
rects2 = plt.barh(index + bar_width, average, bar_width,alpha=opacity,color='#00bcd4',
label="Students' Average",edgecolor = "black", linewidth=4)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params(axis=u'both', which=u'both',length=0)
plt.xticks([])
for i, v in enumerate(grades):
ax.text(v-1.5, i-0.08, str(v), color='white', fontweight='bold', fontsize=22)
for i, v in enumerate(average):
ax.text(v-1.5, i+0.28, str(round(v, 1)), color='white', fontweight='bold', fontsize=22)
# plt.title('Smallest and largest difference between \n') # my grade and the average
# plt.text(0.3,9.65, 'my grade', fontsize=12,color='#1976d2')
# plt.text(0.6,9.65, 'and', fontsize=12)
# plt.text(0.74,9.65, 'the average', fontsize=12,color='#00bcd4')
plt.yticks(index + bar_width/2, to_plot_diff.Course.values)
ax.set_yticklabels(['Clinical\nPsychology', 'Work Group\nPsychology'])
# plt.legend()\
plt.tight_layout()
# plt.savefig('bar_difference.png',dpi=600, transparent=True)
plt.show()
plot_histogram(df)
df = df.sort_values('Date')
to_plot = df.loc[df.Grade!='P', :].copy()
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_grade'] = moving_avg_grade(to_plot, min_periods=2)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=2)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)
fig, ax = plt.subplots(figsize=(15, 5))
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10
colors = ['#f44336','#607d8b','#4caf50','#00C4B4','#ffc107','#9c27b0']
colors = ['#03a9f4', '#29b6f6', '#4fc3f7', '#81d4fa', '#b3e5fc', '#e1f5fe']
colors.reverse()
colors = ['#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6', '#29b6f6']
for i, study_type in enumerate(to_plot.Type.unique()):
plt.scatter(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Grade.values.astype(float),
color=colors[i], alpha=.85, label=study_type, linewidth=1, edgecolor='black',
s=to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Credits.values*20)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_grade.values.astype(float),
color=colors[i], alpha=0.5, label=study_type, linewidth=6)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_avg.values.astype(float),
'--', color=colors[i], alpha=0.5, label=study_type, linewidth=4)
plt.ylim(ymin=5.8, ymax=10.1)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# Add averages of my own grades
plt.text(date(2013,8,30), 6.67,
str(round(to_plot.loc[to_plot.Type=='BA', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#f44336', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2014,8,25), 8,
str(round(to_plot.loc[to_plot.Type=='IO', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#607d8b', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,20), 7.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#4caf50', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,9,20), 8.2,
str(round(to_plot.loc[to_plot.Type=='CL', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#00C4B4', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,9,10), 8.95,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#ffc107', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,3,15), 8.1,
str(round(to_plot.loc[to_plot.Type=='DS', 'Grade'].mean(), 1))+'\navg',
fontsize=10, color='#9c27b0', horizontalalignment='center', alpha=0.8, fontweight='bold')
# add averages of average
plt.text(date(2014,8,1), 7.1,
str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#607d8b', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,15), 6.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#4caf50', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,8,15), 6.9,
str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#00C4B4', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,8,15), 7.6,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#ffc107', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,2,5), 6.85,
str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
fontsize=10, color='#9c27b0', horizontalalignment='center', alpha=0.8, fontweight='bold')
# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))
# Create custom legend
cmap = plt.cm.coolwarm
custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
# fontsize=16)|
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()
# plt.savefig('grades_viz_sizes_v2.png', dpi=600, transparent=True)
df = df.sort_values('Date')
to_plot = df.loc[df.Grade!='P', :].copy()
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_grade'] = moving_avg_grade(to_plot, min_periods=2)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=2)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)
# df.loc[:, 'Date'] = df['Date'].dt.strftime("%Y%m%d").astype(int)
fig, ax = plt.subplots(figsize=(15, 5))
colors = ['#63ace5', '#83d0c9', '#d0e1f9', '#e6d4a2', '#93a8b0', '#dacdbe']
# colors = ['#dacdbe', '#63ace5', '#d0e1f9', '#e6d4a2', '#93a8b0', '#83d0c9']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#93a8b0', '#dacdbe']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#f0908a', '#dacdbe']
#4a707a
for i, study_type in enumerate(to_plot.Type.unique()):
plt.scatter(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Grade.values.astype(float),
color=colors[i], alpha=.9, label=study_type, linewidth=1, edgecolor='black',
s=to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Credits.values*20)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_grade.values.astype(float),
color=colors[i], alpha=0.7, label=study_type, linewidth=6, zorder=0)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].new_date.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_avg.values.astype(float),
'--', color=colors[i], alpha=0.7, label=study_type, linewidth=4, zorder=0)
plt.ylim(ymin=5.8, ymax=10.1)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# Add averages of my own grades
plt.text(date(2013,8,30), 6.67,
str(round(to_plot.loc[to_plot.Type=='BA', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2014,8,25), 8,
str(round(to_plot.loc[to_plot.Type=='IO', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,20), 7.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,9,20), 8.2,
str(round(to_plot.loc[to_plot.Type=='CL', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,9,10), 8.95,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,3,15), 8.1,
str(round(to_plot.loc[to_plot.Type=='DS', 'Grade'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# add averages of average
plt.text(date(2014,8,1), 7.1,
str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2015,8,15), 6.78,
str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2016,8,15), 6.9,
str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2017,8,15), 7.6,
str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
plt.text(date(2019,2,5), 6.85,
str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))
# Create custom legend
cmap = plt.cm.coolwarm
custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
# fontsize=16)|
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()
# plt.savefig('grades_viz_sizes_temp.png', dpi=600, transparent=True)
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.cbook import get_sample_data
def imscatter(x, y, image, ax=None, zoom=1):
if ax is None:
ax = plt.gca()
try:
image = plt.imread(image)
except TypeError:
# Likely already an array...
pass
im = OffsetImage(image, zoom=zoom)
x, y = np.atleast_1d(x, y)
artists = []
for x0, y0 in zip(x, y):
ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
artists.append(ax.add_artist(ab))
ax.update_datalim(np.column_stack([x, y]))
ax.autoscale()
return artists
df = df.sort_values('Date')
to_plot = df.loc[df.Grade!='P', :].copy()
to_plot['new_date'] = get_new_dates(to_plot)
to_plot['mov_grade'] = moving_avg_grade(to_plot, min_periods=1)
to_plot['mov_avg'] = moving_avg_avg(to_plot, min_periods=1)
to_plot['cumsum_credits'] = credits_cumsum(to_plot)
to_plot['date_int'] = to_plot[['new_date']].diff()
to_plot['date_int'] = to_plot[['date_int']].dropna().apply(lambda row:
row.date_int.days, 1)
to_plot['date_int'] = to_plot.date_int.fillna(0).values
to_plot['date_int'] = to_plot['date_int'].cumsum()
fig, ax = plt.subplots(figsize=(15, 5))
colors = ['#63ace5', '#83d0c9', '#d0e1f9', '#e6d4a2', '#93a8b0', '#dacdbe']
# colors = ['#dacdbe', '#63ace5', '#d0e1f9', '#e6d4a2', '#93a8b0', '#83d0c9']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#93a8b0', '#dacdbe']
# colors = ['#63ace5', '#83d0c9', '#e6d4a2', '#d0e1f9', '#f0908a', '#dacdbe']
for i, study_type in enumerate(to_plot.Type.unique()):
plt.scatter(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].date_int.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Grade.values.astype(float),
color=colors[i], alpha=.9, label=study_type, linewidth=0, edgecolor='black',
s=to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Credits.values*20)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].date_int.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_grade.values.astype(float),
color=colors[i], alpha=0.7, label=study_type, linewidth=6, zorder=0)
plt.plot(to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].date_int.values,
to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].mov_avg.values.astype(float),
'--', color=colors[i], alpha=1, label=study_type, linewidth=4, zorder=0)
x = to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].date_int.values
y = to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Grade.values.astype(float)
sizes = to_plot[(to_plot.Grade!='P')&(to_plot.Type==study_type)].Credits.values
size_dict = {2:0.01, 5:0.011, 6:0.015, 12:0.02, 18:0.025, 24:0.03}
for x_i, y_i, size in zip(x, y, sizes):
imscatter(x_i, y_i, 'circle_3.png', zoom=size_dict[size], ax=ax)
plt.ylim(ymin=5.8, ymax=10.1)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# # Add names of education
# plt.text(date(2012,1,1), 10.2, 'Bachelor\nPsychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2014,1,18), 9.2, 'Master\n IO Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2015,1,25), 8.8, 'Pre-master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2016,2,10), 9.8, 'Master\nClinical Psychology', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2017,3,10), 10.2, 'Pre-master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# plt.text(date(2018,6,1), 9.2, 'Master\nData Science', fontsize=12, color='black',
# horizontalalignment='center', fontweight='bold')
# # Add averages of my own grades
# plt.text(date(2013,8,30), 6.67,
# str(round(to_plot.loc[to_plot.Type=='BA', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2014,8,25), 8,
# str(round(to_plot.loc[to_plot.Type=='IO', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2015,8,20), 7.78,
# str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2016,9,20), 8.2,
# str(round(to_plot.loc[to_plot.Type=='CL', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2017,9,10), 8.95,
# str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2019,3,15), 8.1,
# str(round(to_plot.loc[to_plot.Type=='DS', 'Grade'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# # add averages of average
# plt.text(date(2014,8,1), 7.1,
# str(round(to_plot.loc[to_plot.Type=='IO', 'Average'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2015,8,15), 6.78,
# str(round(to_plot.loc[to_plot.Type=='PM-CL', 'Average'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2016,8,15), 6.9,
# str(round(to_plot.loc[to_plot.Type=='CL', 'Average'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2017,8,15), 7.6,
# str(round(to_plot.loc[to_plot.Type=='PM-DS', 'Average'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# plt.text(date(2019,2,5), 6.85,
# str(round(to_plot.loc[to_plot.Type=='DS', 'Average'].mean(), 1))+'\navg',
# fontsize=8, color='grey', horizontalalignment='center', alpha=0.8, fontweight='bold')
# color axes
# ax.spines['bottom'].set_color('#dddddd')
# ax.spines['left'].set_color('#dddddd')
ax.tick_params(axis='x', colors='black')
ax.tick_params(axis=u'both', which=u'both',length=0)
# ax.set_xticks([date(2011+(i),1,1) for i in np.arange(0, 10, 2)])
ax.set_yticks(np.arange(6, 11, 1))
# Create custom legend
cmap = plt.cm.coolwarm
custom_lines = [Line2D([0], [0], color='black', markersize=12,marker='o',markerfacecolor='#dddddd',
markeredgewidth=1.5),
Line2D([0], [0], color='#dddddd', lw=4),
Line2D([0], [0], color='#dddddd', lw=6,linestyle='--')]
# ax.legend(custom_lines, ['My Grade', 'My Average', "Students' Average"],
# fontsize=16)|
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tick_params(axis='both', which='minor', labelsize=14)
plt.show()
# plt.savefig('circles_plot_full.png', dpi=600, transparent=True)
import matplotlib.dates as mdates
fig, ax = plt.subplots()
colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] * 10
colors = ['#56b4e9', 'purple', 'red', 'green', 'yellow', 'orange']
for i, study_type in enumerate(df.Type.unique()):
plt.scatter(df[(df.Grade!='P')&(df.Type==study_type)].Date.values,
df[(df.Grade!='P')&(df.Type==study_type)].Grade.values.astype(float),
color=colors[i], alpha=0.8, label=study_type, linewidth=1,
s=df[(df.Grade!='P')&(df.Type==study_type)].Credits.values**2.5, edgecolor='black')
plt.ylim(ymin=5.8, ymax=10.1)
fig.set_size_inches(15, 5)
# plt.legend()
# # # Set number of xticks
ax.xaxis.set_major_locator(mdates.DayLocator(interval=300)) #to get a tick every 60 days
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.show()
# plt.savefig('test.png', dpi=300)
** Compared to all other students **
15% of students achieved the same or a higher result.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from collections import namedtuple
avg_df = avg_df.sort_values('Avg', ascending=False)
# to_plot = avg_df.loc[avg_df.Education=='MS-DS']
to_plot = avg_df
n_groups = len(to_plot)
my_scores = to_plot.My.values
avg_scores = to_plot.fillna(0).Avg.values
fig, ax = plt.subplots(figsize=(10, 10))
index = np.arange(n_groups)
bar_width = 0.4
opacity = .8
rects1 = ax.barh(index, my_scores, bar_width,
alpha=opacity, color='#003f5c',
label='My Grade')
rects2 = ax.barh(index + bar_width, avg_scores, bar_width,
alpha=opacity, color='#ffa600',
label='The Average Grade')
ax.set_title('My grades during three different masters versus the average', fontsize=14, fontweight='bold')
plt.text(3.5,25.5,'[DS] = Data Science \n[CP] = Clinical Psychology \n[IOP] = I/O Psychology')
ax.set_yticks(index + bar_width / 2)
ax.set_yticklabels(to_plot.Course.values)
ax.legend(loc='upper left',frameon=False)
for i, v in enumerate(my_scores):
ax.text(v-0.5, i, str(v), color='white', fontweight='bold', va='center', fontsize=10)
for i, v in enumerate(avg_scores):
if v == 0:
ax.text(v + .05, i+0.4, str(v), color='white', fontweight='bold', va='center')
else:
ax.text(v - .6, i+0.4, str(v), color='black', fontweight='bold', va='center')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.get_xaxis().set_ticks([])
fig.tight_layout()
plt.show()
# plt.savefig('scores.png', dpi=300)