%load_ext nb_black
import pandas as pd
import numpy as np
from os import listdir, path
output_path = "./output"
players_df = (
pd.concat(
pd.read_excel(path.join(output_path, f))
for f in listdir("./output")
if f.startswith("season_")
)
.drop(columns=["Unnamed: 0"])
.dropna()
).sort_values(["season", "rank"])
players_df
rank | name | country | matches | mmr | season | |
---|---|---|---|---|---|---|
0 | 1 | kolemoen | DE | 431 | 10484 | M2_01 Wolf 2020 |
1 | 2 | kams134 | PL | 923 | 10477 | M2_01 Wolf 2020 |
2 | 3 | TailBot | PL | 538 | 10472 | M2_01 Wolf 2020 |
3 | 4 | Pajabol | PL | 820 | 10471 | M2_01 Wolf 2020 |
4 | 5 | Adzikov | PL | 1105 | 10442 | M2_01 Wolf 2020 |
... | ... | ... | ... | ... | ... | ... |
15 | 14790 | MagCCG | RU | 4 | 389 | M4_02 Love 2022 |
41 | 14791 | 可乐加冰. | CN | 4 | 386 | M4_02 Love 2022 |
109 | 14792 | RomanTrooper | RU | 4 | 386 | M4_02 Love 2022 |
20 | 14793 | Naveless | DE | 4 | 384 | M4_02 Love 2022 |
55 | 14794 | Pargali | PL | 1 | 96 | M4_02 Love 2022 |
182799 rows × 6 columns
players_df[players_df["name"] == "sepro"]
rank | name | country | matches | mmr | season | |
---|---|---|---|---|---|---|
2084 | 3259 | sepro | BE | 142 | 9617 | M2_05 Viper 2020 |
1137 | 1138 | sepro | BE | 254 | 9746 | M2_06 Magic 2020 |
1166 | 1167 | sepro | BE | 260 | 9820 | M2_07 Griffin 2020 |
1254 | 1255 | sepro | BE | 214 | 9779 | M2_08 Draconid 2020 |
2237 | 12816 | sepro | BE | 97 | 3360 | M2_09 Dryad 2020 |
1056 | 1057 | sepro | BE | 351 | 9846 | M2_10 Cat 2020 |
1638 | 1639 | sepro | BE | 154 | 9785 | M2_11 Mahakam 2020 |
2515 | 12856 | sepro | BE | 80 | 7407 | M2_12 Wild Hunt 2020 |
1466 | 1467 | sepro | BE | 175 | 9724 | M3_01 Wolf 2021 |
679 | 680 | sepro | BE | 406 | 9903 | M3_02 Love 2021 |
1784 | 9353 | sepro | BE | 63 | 5384 | M3_03 Bear 2021 |
2553 | 14501 | sepro | BE | 40 | 2836 | M3_04 Elf 2021 |
def sdi(data) -> float:
"""
Given a list of counts, calculate the Shannon Diversity Index
:param data: list of integers
:return: Shannon Diversity Index
"""
def p(n, N):
return n / N * np.log(n / N)
x = np.array([d for d in data if d != 0])
N = np.sum(x.astype("float64"))
if N == 0:
raise ZeroDivisionError(
"Cannot calculate Shannon Diversity Index when the sum of all observations is zero"
)
return 0 - np.sum(np.array([p(n, N) for n in x]))
def dominance(data) -> float:
N = np.sum(np.array(data).astype("float64"))
if N > 0:
proportions = data / N
return np.sum(np.square(proportions))
else:
return np.nan
def simpson(data) -> float:
d = dominance(data)
if np.isnan(d):
return np.nan
else:
return 1 - d
season_country_df = (
players_df[players_df["rank"] <= 2000]
.groupby(["season", "country"])
.agg(count=pd.NamedAgg("name", "count"))
.reset_index()
)
season_country_df
season | country | count | |
---|---|---|---|
0 | M2_01 Wolf 2020 | AE | 1 |
1 | M2_01 Wolf 2020 | AM | 1 |
2 | M2_01 Wolf 2020 | AR | 4 |
3 | M2_01 Wolf 2020 | AT | 11 |
4 | M2_01 Wolf 2020 | AU | 20 |
... | ... | ... | ... |
2116 | M4_02 Love 2022 | US | 101 |
2117 | M4_02 Love 2022 | UZ | 3 |
2118 | M4_02 Love 2022 | VN | 6 |
2119 | M4_02 Love 2022 | ZA | 7 |
2120 | M4_02 Love 2022 | ZW | 1 |
2121 rows × 3 columns
season_diversity_df = (
season_country_df.groupby(["season"])
.agg(
richness=pd.NamedAgg("count", "count"),
shannon_diversity=pd.NamedAgg("count", sdi),
dominance=pd.NamedAgg("count", dominance),
simpson=pd.NamedAgg("count", simpson),
)
.reset_index()
)
season_diversity_df.set_index("season").to_excel(
"./output/seasonal_diversity_of_nations.xlsx"
)
season_diversity_df
season | richness | shannon_diversity | dominance | simpson | |
---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | 79 | 2.911451 | 0.109716 | 0.890284 |
1 | M2_02 Love 2020 | 73 | 2.784907 | 0.124585 | 0.875415 |
2 | M2_03 Bear 2020 | 74 | 2.770397 | 0.123829 | 0.876171 |
3 | M2_04 Elf 2020 | 78 | 2.921609 | 0.105208 | 0.894792 |
4 | M2_05 Viper 2020 | 82 | 2.975021 | 0.106955 | 0.893045 |
5 | M2_06 Magic 2020 | 79 | 3.028216 | 0.100404 | 0.899596 |
6 | M2_07 Griffin 2020 | 80 | 3.085621 | 0.092351 | 0.907649 |
7 | M2_08 Draconid 2020 | 83 | 3.043551 | 0.097257 | 0.902743 |
8 | M2_09 Dryad 2020 | 83 | 3.071002 | 0.093658 | 0.906342 |
9 | M2_10 Cat 2020 | 82 | 3.028427 | 0.101374 | 0.898626 |
10 | M2_11 Mahakam 2020 | 83 | 2.981676 | 0.110916 | 0.889084 |
11 | M2_12 Wild Hunt 2020 | 76 | 3.040627 | 0.099488 | 0.900512 |
12 | M3_01 Wolf 2021 | 86 | 3.088299 | 0.098081 | 0.901919 |
13 | M3_02 Love 2021 | 84 | 3.045043 | 0.101261 | 0.898739 |
14 | M3_03 Bear 2021 | 86 | 3.086869 | 0.090533 | 0.909467 |
15 | M3_04 Elf 2021 | 83 | 3.124564 | 0.091177 | 0.908823 |
16 | M3_05 Viper 2021 | 82 | 3.147269 | 0.086137 | 0.913863 |
17 | M3_06 Magic 2021 | 82 | 3.162725 | 0.084816 | 0.915184 |
18 | M3_07 Griffin 2021 | 84 | 3.083937 | 0.098090 | 0.901910 |
19 | M3_08 Draconid 2021 | 82 | 3.023176 | 0.103806 | 0.896194 |
20 | M3_09 Dryad 2021 | 86 | 3.009823 | 0.108551 | 0.891449 |
21 | M3_10 Cat 2021 | 89 | 3.132842 | 0.091885 | 0.908115 |
22 | M3_11 Mahakam 2021 | 82 | 2.989644 | 0.110278 | 0.889722 |
23 | M3_12 Wild Hunt 2021 | 80 | 3.101439 | 0.092863 | 0.907137 |
24 | M4_01 Wolf 2022 | 84 | 3.012610 | 0.112893 | 0.887107 |
25 | M4_02 Love 2022 | 79 | 3.030534 | 0.104149 | 0.895851 |
summary_df = (
players_df.groupby(["season"])
.agg(
players_found=pd.NamedAgg("name", "count"),
players_total=pd.NamedAgg("rank", "max"),
)
.reset_index()
)
summary_df["players_captured"] = (
summary_df["players_found"] * 100 / summary_df["players_total"]
)
summary_df["extra_players_captured"] = (
(summary_df["players_found"] - 2860) * 100 / (summary_df["players_total"] - 2860)
)
summary_df.set_index("season").to_excel("./output/seasonal_player_counts.xlsx")
summary_df
season | players_found | players_total | players_captured | extra_players_captured | |
---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | 2905 | 2997 | 96.930264 | 32.846715 |
1 | M2_02 Love 2020 | 3984 | 4883 | 81.589187 | 55.561048 |
2 | M2_03 Bear 2020 | 4772 | 6632 | 71.954162 | 50.689290 |
3 | M2_04 Elf 2020 | 6070 | 10209 | 59.457342 | 43.679412 |
4 | M2_05 Viper 2020 | 5977 | 10079 | 59.301518 | 43.177725 |
5 | M2_06 Magic 2020 | 6104 | 9919 | 61.538462 | 45.955518 |
6 | M2_07 Griffin 2020 | 7372 | 14791 | 49.841120 | 37.817450 |
7 | M2_08 Draconid 2020 | 7037 | 13800 | 50.992754 | 38.180987 |
8 | M2_09 Dryad 2020 | 7056 | 14554 | 48.481517 | 35.881649 |
9 | M2_10 Cat 2020 | 6987 | 16011 | 43.638748 | 31.381644 |
10 | M2_11 Mahakam 2020 | 6644 | 16752 | 39.660936 | 27.238699 |
11 | M2_12 Wild Hunt 2020 | 7806 | 22464 | 34.748932 | 25.229545 |
12 | M3_01 Wolf 2021 | 6197 | 13428 | 46.149836 | 31.576457 |
13 | M3_02 Love 2021 | 7119 | 16016 | 44.449301 | 32.373062 |
14 | M3_03 Bear 2021 | 7012 | 13636 | 51.422705 | 38.530067 |
15 | M3_04 Elf 2021 | 10107 | 16049 | 62.975886 | 54.947305 |
16 | M3_05 Viper 2021 | 7771 | 17623 | 44.095784 | 33.265596 |
17 | M3_06 Magic 2021 | 10899 | 17442 | 62.487100 | 55.129612 |
18 | M3_07 Griffin 2021 | 7771 | 16772 | 46.333174 | 35.300460 |
19 | M3_08 Draconid 2021 | 8333 | 17523 | 47.554642 | 37.325240 |
20 | M3_09 Dryad 2021 | 7561 | 17486 | 43.240307 | 32.141392 |
21 | M3_10 Cat 2021 | 8023 | 14048 | 57.111333 | 46.147658 |
22 | M3_11 Mahakam 2021 | 9147 | 19902 | 45.960205 | 36.891210 |
23 | M3_12 Wild Hunt 2021 | 9566 | 21651 | 44.182717 | 35.687297 |
24 | M4_01 Wolf 2022 | 7607 | 15520 | 49.014175 | 37.496051 |
25 | M4_02 Love 2022 | 2972 | 14794 | 20.089225 | 0.938495 |
season_df = pd.read_excel("./output/seasonal_stats.xlsx").drop(
columns=["Unnamed: 0", "min_mmr", "max_mmr", "num_matches"]
)
estimates_df = pd.read_excel("./output/player_estimates.xlsx").drop(
columns=["Unnamed: 0"]
)
merged_df = pd.merge(summary_df, season_diversity_df, how="left", on="season")
merged_df = pd.merge(merged_df, season_df, how="left", on="season")
merged_df = pd.merge(merged_df, estimates_df, how="left", on="season")
merged_df
season | players_found | players_total | players_captured | extra_players_captured | richness | shannon_diversity | dominance | simpson | top500_cutoff | top200_cutoff | top64_cutoff | low_estimate | high_estimate | mean_estimate | std_err | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | 2905 | 2997 | 96.930264 | 32.846715 | 79 | 2.911451 | 0.109716 | 0.890284 | 9749 | 9872 | 10061 | 2900.000000 | 3600.0 | 3117.636364 | 124.944153 |
1 | M2_02 Love 2020 | 3984 | 4883 | 81.589187 | 55.561048 | 73 | 2.784907 | 0.124585 | 0.875415 | 9832 | 9952 | 10117 | 4566.666667 | 7100.0 | 5620.242424 | 441.315936 |
2 | M2_03 Bear 2020 | 4772 | 6632 | 71.954162 | 50.689290 | 74 | 2.770397 | 0.123829 | 0.876171 | 9867 | 9995 | 10204 | 6036.363636 | 10300.0 | 7329.272727 | 760.229978 |
3 | M2_04 Elf 2020 | 6070 | 10209 | 59.457342 | 43.679412 | 78 | 2.921609 | 0.105208 | 0.894792 | 9952 | 10087 | 10293 | 9927.272727 | 18000.0 | 12319.454545 | 1494.370140 |
4 | M2_05 Viper 2020 | 5977 | 10079 | 59.301518 | 43.177725 | 82 | 2.975021 | 0.106955 | 0.893045 | 9910 | 10028 | 10255 | 7766.666667 | 11400.0 | 9372.060606 | 727.332197 |
5 | M2_06 Magic 2020 | 6104 | 9919 | 61.538462 | 45.955518 | 79 | 3.028216 | 0.100404 | 0.899596 | 9896 | 10002 | 10191 | 6800.000000 | 9800.0 | 8320.181818 | 618.201230 |
6 | M2_07 Griffin 2020 | 7372 | 14791 | 49.841120 | 37.817450 | 80 | 3.085621 | 0.092351 | 0.907649 | 9978 | 10100 | 10289 | 12836.363636 | 19900.0 | 14683.272727 | 1331.618216 |
7 | M2_08 Draconid 2020 | 7037 | 13800 | 50.992754 | 38.180987 | 83 | 3.043551 | 0.097257 | 0.902743 | 9946 | 10061 | 10246 | 9566.666667 | 13300.0 | 11186.242424 | 696.853229 |
8 | M2_09 Dryad 2020 | 7056 | 14554 | 48.481517 | 35.881649 | 83 | 3.071002 | 0.093658 | 0.906342 | 9946 | 10046 | 10183 | 9733.333333 | 12580.0 | 11218.666667 | 458.757501 |
9 | M2_10 Cat 2020 | 6987 | 16011 | 43.638748 | 31.381644 | 82 | 3.028427 | 0.101374 | 0.898626 | 9977 | 10067 | 10176 | 12800.000000 | 14620.0 | 13774.181818 | 369.275995 |
10 | M2_11 Mahakam 2020 | 6644 | 16752 | 39.660936 | 27.238699 | 83 | 2.981676 | 0.110916 | 0.889084 | 10000 | 10090 | 10216 | 12995.454545 | 18900.0 | 16041.757576 | 976.287680 |
11 | M2_12 Wild Hunt 2020 | 7806 | 22464 | 34.748932 | 25.229545 | 76 | 3.040627 | 0.099488 | 0.900512 | 10070 | 10172 | 10313 | 12995.454545 | 35900.0 | 23051.757576 | 3678.743988 |
12 | M3_01 Wolf 2021 | 6197 | 13428 | 46.149836 | 31.576457 | 86 | 3.088299 | 0.098081 | 0.901919 | 9916 | 10044 | 10295 | 7968.181818 | 13700.0 | 9929.636364 | 1046.105394 |
13 | M3_02 Love 2021 | 7119 | 16016 | 44.449301 | 32.373062 | 84 | 3.045043 | 0.101261 | 0.898739 | 9975 | 10097 | 10325 | 11645.454545 | 19500.0 | 13977.757576 | 1430.087791 |
14 | M3_03 Bear 2021 | 7012 | 13636 | 51.422705 | 38.530067 | 86 | 3.086869 | 0.090533 | 0.909467 | 9914 | 10026 | 10230 | 7466.666667 | 10900.0 | 9195.333333 | 715.137594 |
15 | M3_04 Elf 2021 | 10107 | 16049 | 62.975886 | 54.947305 | 83 | 3.124564 | 0.091177 | 0.908823 | 9992 | 10102 | 10323 | 11586.363636 | 20300.0 | 14814.606061 | 1527.809485 |
16 | M3_05 Viper 2021 | 7771 | 17623 | 44.095784 | 33.265596 | 82 | 3.147269 | 0.086137 | 0.913863 | 9998 | 10106 | 10300 | 12990.909091 | 20900.0 | 16159.515152 | 1322.547676 |
17 | M3_06 Magic 2021 | 10899 | 17442 | 62.487100 | 55.129612 | 82 | 3.162725 | 0.084816 | 0.915184 | 9974 | 10082 | 10278 | 11263.636364 | 17200.0 | 13526.060606 | 1034.558786 |
18 | M3_07 Griffin 2021 | 7771 | 16772 | 46.333174 | 35.300460 | 84 | 3.083937 | 0.098090 | 0.901910 | 9958 | 10067 | 10287 | 10140.909091 | 15800.0 | 12241.515152 | 1040.267969 |
19 | M3_08 Draconid 2021 | 8333 | 17523 | 47.554642 | 37.325240 | 82 | 3.023176 | 0.103806 | 0.896194 | 9954 | 10079 | 10281 | 11200.000000 | 16900.0 | 12854.545455 | 1051.176279 |
20 | M3_09 Dryad 2021 | 7561 | 17486 | 43.240307 | 32.141392 | 86 | 3.009823 | 0.108551 | 0.891449 | 9968 | 10078 | 10250 | 11868.181818 | 15900.0 | 13442.969697 | 746.264455 |
21 | M3_10 Cat 2021 | 8023 | 14048 | 57.111333 | 46.147658 | 89 | 3.132842 | 0.091885 | 0.908115 | 9879 | 9974 | 10083 | 5100.000000 | 7770.0 | 6559.939394 | 575.186402 |
22 | M3_11 Mahakam 2021 | 9147 | 19902 | 45.960205 | 36.891210 | 82 | 2.989644 | 0.110278 | 0.889722 | 10012 | 10122 | 10252 | 12981.818182 | 23300.0 | 17920.363636 | 1647.708090 |
23 | M3_12 Wild Hunt 2021 | 9566 | 21651 | 44.182717 | 35.687297 | 80 | 3.101439 | 0.092863 | 0.907137 | 10032 | 10140 | 10271 | 12981.818182 | 27500.0 | 19567.696970 | 2331.781053 |
24 | M4_01 Wolf 2022 | 7607 | 15520 | 49.014175 | 37.496051 | 84 | 3.012610 | 0.112893 | 0.887107 | 9941 | 10093 | 10340 | 8690.909091 | 18900.0 | 12063.515152 | 1789.893240 |
25 | M4_02 Love 2022 | 2972 | 14794 | 20.089225 | 0.938495 | 79 | 3.030534 | 0.104149 | 0.895851 | 9932 | 10068 | 10312 | NaN | NaN | NaN | NaN |
merged_df["series"] = merged_df.season.apply(lambda x: x.split("_")[0])
import seaborn as sns
import matplotlib.pyplot as plt
sns.regplot(
data=merged_df, x="players_total", y="mean_estimate", scatter=False, color=".25"
)
sns.scatterplot(data=merged_df, x="players_total", y="mean_estimate", hue="series")
plt.title("Actual Players vs Estimated")
plt.xlabel("Number of Players")
plt.ylabel("Estimated Players")
plt.xlim(2500, 23000)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.regplot(
data=merged_df, x="players_total", y="shannon_diversity", scatter=False, color=".25"
)
sns.scatterplot(data=merged_df, x="players_total", y="shannon_diversity", hue="series")
plt.title("Actual Players vs Diversity")
plt.xlabel("Number of Players")
plt.ylabel("Shannon Diversity")
plt.xlim(2500, 23000)
plt.show()