Score distribution of Hash Code qualifiers¶

For some distributions, I have zoomed in to the higher score to see how the top scores are distributed.

Star this if you find interesting!

In [1]:

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

csv_filename = "score_distribution.csv"

In [2]:

def analyse_year(csv_filename,
    column = "",
    lower = 0,
    upper = 900*10**5,
    bin_size = 10000,
    ranks = [1,10,40,100,500,1000,2000,3000,4000,5000],
    height_start = 75,
    height_space = 75,
    title = ""):
    
    df = pd.read_csv(csv_filename)
    scores = list(df[column][df[column] >= lower].astype(int))
    scores.sort(reverse=True)
    
    plt.figure(figsize=(14,5), facecolor=(1, 1, 1))
    plt.hist(scores, bins=range(lower, upper, bin_size), color="red")
    for i,rank in enumerate(ranks):
        ypos = (i*height_space) + height_start
        score = scores[rank-1]
        plt.plot([score,score],[0,ypos], color="blue")
        plt.text(score, ypos, " Rank {}: {:,}".format(rank, score))
    plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, p: format(int(x), ',')))
    plt.title(title)
    plt.ylabel("Count per {} point block".format(bin_size))
    plt.xlabel("Score Distribution")
    plt.show()

In [3]:

year = "2022q"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 15*10**5
upper = 46*10**5
bin_size = 20000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 20
height_space = 40

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [4]:

year = "2022b"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 12*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [5]:

year = "2022c"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 32*10**4
bin_size = 2000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [6]:

year = "2022d"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 75*10**4
bin_size = 5000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [7]:

year = "2022e"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 120*10**4
upper = 172*10**4
bin_size = 5000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 120
height_space = 120

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [8]:

year = "2022f"
title = "Hash Code Qualification Round {}".format(year)
column = "{}-score".format(year)
lower = 0
upper = 135*10**4
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000]
height_start = 120
height_space = 120

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [9]:

year = 2021
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 75*10**5
upper = 110*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000,4000,5000]
height_start = 75
height_space = 75

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [10]:

year = 2020
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 250*10**5
upper = 278*10**5
bin_size = 20000
ranks = [1,10,40,100,500,1000,2000]
height_start = 25
height_space = 25

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [11]:

year = 2019
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 1*10**5
upper = 15*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000,2000,3000]
height_start = 75
height_space = 75

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [12]:

year = 2018
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 400*10**5
upper = 510*10**5
bin_size = 100000
ranks = [1,10,40,100,500,1000]
height_start = 25
height_space = 25

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [13]:

year = 2017
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 15*10**5
upper = 30*10**5
bin_size = 10000
ranks = [1,10,40,100,500,1000]
height_start = 25
height_space = 25

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [14]:

year = 2016
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 0
upper = 4*10**5
bin_size = 5000
ranks = [1,10,40,100,500]
height_start = 50
height_space = 50

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [15]:

year = 2015
title = "Hash Code Qualification Round {}".format(year)
column = "{}q-score".format(year)
lower = 0
upper = 500
bin_size = 10
ranks = [1,10,40,100]
height_start = 10
height_space = 10

analyse_year(csv_filename, column, lower, upper, bin_size, ranks, height_start, height_space, title)

In [ ]: