For some distributions, I have zoomed in to the higher score to see how the top scores are distributed.
Star this if you find interesting!
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
csv_filename = "score_distribution.csv"
def analyse_year(csv_filename,
column = "",
lower = 0,
upper = 900*10**5,
bin_size = 10000,
ranks = [1,10,40,100,500,1000,2000,3000,4000,5000],
height_start = 75,
height_space = 75,
title = ""):
df = pd.read_csv(csv_filename)
scores = list(df[column][df[column] >= lower].astype(int))
scores.sort(reverse=True)
plt.figure(figsize=(14,5), facecolor=(1, 1, 1))
plt.hist(scores, bins=range(lower, upper, bin_size), color="red")
for i,rank in enumerate(ranks):
ypos = (i*height_space) + height_start
score = scores[rank-1]
plt.plot([score,score],[0,ypos], color="blue")
plt.text(score, ypos, " Rank {}: {:,}".format(rank, score))
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, p: format(int(x), ',')))
plt.title(title)
plt.ylabel("Count per {} point block".format(bin_size))
plt.xlabel("Score Distribution")
plt.show()
year = "2022q"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 15*10**5
upper = 46*10**5
bin_size = 20000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 20
height_space = 40
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = "2022b"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 12*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = "2022c"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 32*10**4
bin_size = 2000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = "2022d"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 75*10**4
bin_size = 5000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = "2022e"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 120*10**4
upper = 172*10**4
bin_size = 5000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 120
height_space = 120
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = "2022f"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 135*10**4
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2021
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 75*10**5
upper = 110*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000,5000]
height_start = 75
height_space = 75
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2020
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 250*10**5
upper = 278*10**5
bin_size = 20000
ranks = [1,10,40,100,500,1000,2000]
height_start = 25
height_space = 25
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2019
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 1*10**5
upper = 15*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 75
height_space = 75
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2018
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 400*10**5
upper = 510*10**5
bin_size = 100000
ranks = [1,10,40,100,500,1000]
height_start = 25
height_space = 25
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2017
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 15*10**5
upper = 30*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000]
height_start = 25
height_space = 25
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2016
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 0
upper = 4*10**5
bin_size = 5000
ranks = [1,10,40,100,500]
height_start = 50
height_space = 50
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)
year = 2015
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 0
upper = 500
bin_size = 10
ranks = [1,10,40,100]
height_start = 10
height_space = 10
analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)