First we'll need to get the data from the playgwent website. Here requests and BeautifulSoup are used to get the data and parse the html the comes back. Nothing to difficult here. There is a little clean up in the code and there are two lines of code to check if a player was in the top 500 last season. In case the data already exists it is loaded from disk.
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd
import numpy as np
# Note that seasons should be in order (for the top 500 spots to be picked up correctly in the next step)
seasons = [
('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2020.xlsx'),
('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/{page}', './output/season_of_love_2020.xlsx'),
('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/{page}', './output/season_of_the_bear_2020.xlsx'),
('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/{page}', './output/season_of_the_elf_2020.xlsx'),
('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/{page}', './output/season_of_the_viper_2020.xlsx'),
('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/{page}', './output/season_of_magic_2020.xlsx'),
('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2020.xlsx'),
('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2020.xlsx'),
('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2020.xlsx'),
('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/{page}', './output/season_of_the_cat_2020.xlsx'),
('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/{page}', './output/season_of_the_mahakam_2020.xlsx'),
('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2020.xlsx'),
('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2021.xlsx'),
('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/{page}', './output/season_of_love_2021.xlsx'),
('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/{page}', './output/season_of_the_bear_2021.xlsx'),
('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/{page}', './output/season_of_the_elf_2021.xlsx'),
('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/{page}', './output/season_of_the_viper_2021.xlsx'),
('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/{page}', './output/season_of_the_magic_2021.xlsx'),
('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2021.xlsx'),
('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2021.xlsx'),
('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2021.xlsx'),
('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/{page}', './output/season_of_the_cat_2021.xlsx'),
('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/{page}', './output/season_of_the_mahakam_2021.xlsx'),
('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2021.xlsx'),
('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2022.xlsx'),
('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/{page}', './output/season_of_love_2022.xlsx'),
]
os.makedirs('./output', exist_ok=True)
dataframes = []
top500 = []
for season, url_template, output_path in seasons:
if os.path.exists(output_path):
df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)
else:
output = []
for i in tqdm(range(1, 150)):
url = url_template.replace('{page}', str(i))
try:
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.findAll("div", {"class": "c-ranking-table__tr"})
for row in rows:
flag = row.find("i", {"class": "flag-icon"})["class"][1]
new_record = {
'rank': int(row.find("div", {"class": "td-number"}).text.strip()),
'name': row.find("div", {"class": "td-nick"}).text.strip(),
'country': flag.replace('flag-icon-', '').upper(),
'matches': int(row.find("div", {"class": "td-matches"}).text.strip().replace(' matches', '')),
'mmr': int(row.find("div", {"class": "td-mmr"}).text.strip().replace(',', '')),
'season': season
}
output.append(new_record)
except:
pass
df = pd.DataFrame(output).drop_duplicates()
df.to_excel(output_path)
# We'll add a column here if the player was in the top 500 last season
df['previous_top500'] = ['yes' if name in top500 else 'no' for name in df['name']]
top500 = list(df[df['rank'] <= 500]['name'])
dataframes.append(df)
# Concatenate dataframes and drop any records with missing values
full_df = pd.concat(dataframes).dropna()
100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [03:16<00:00, 1.32s/it]
We have the 2 letter code for the country of each participant, let's use the pycountry library to convert that to the full name.
import pycountry
def convert_to_full(two_letter_code):
try:
out = pycountry.countries.get(alpha_2=two_letter_code).name
except:
print(f'could not convert {two_letter_code}')
out = two_letter_code
return out
full_df['country'] = full_df['country'].apply(lambda x: convert_to_full(x))
full_df.head()
rank | name | country | matches | mmr | season | previous_top500 | |
---|---|---|---|---|---|---|---|
0 | 1 | kolemoen | Germany | 431 | 10484 | M2_01 Wolf 2020 | no |
1 | 2 | kams134 | Poland | 923 | 10477 | M2_01 Wolf 2020 | no |
2 | 3 | TailBot | Poland | 538 | 10472 | M2_01 Wolf 2020 | no |
3 | 4 | Pajabol | Poland | 820 | 10471 | M2_01 Wolf 2020 | no |
4 | 5 | Adzikov | Poland | 1105 | 10442 | M2_01 Wolf 2020 | no |
Apart from the global rank, we'll add a participant's national rank as well.
full_df['national_rank'] = full_df.groupby(['country','season'])["mmr"].rank("first", ascending=False)
While getting to a higher MMR is the main goal of ladder. There is an argument to be made that the number of games a participant plays to reach a certain point should not be overlooked. Someone that played fewer games to get to a certain MMR climbed the ladder in a more efficient way. There are two metrics that can be used, we take the MMR of a player, substract the minimal MMR, which is 9600, and divide that by the number of matches played. Though, as the higher up you are in ladder, the more fierce the competition becomes, to take this into account Lerio2 from Team Legacy proposed to divide by the square-root of the number of matches instead.
One additional consideration is that players that were in the top 500 the previous season didn't have to climb back up to pro rank. Both metrics actually give them a major advantage. So when comparing efficiency between players the field previous_top500 could be used to allow a fair comparison.
full_df['efficiency'] = ((full_df['mmr']-9600))/full_df['matches']
full_df['lei'] = ((full_df['mmr']-9600))/np.sqrt(full_df['matches'])
full_df.head()
rank | name | country | matches | mmr | season | previous_top500 | national_rank | efficiency | lei | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | kolemoen | Germany | 431 | 10484 | M2_01 Wolf 2020 | no | 1.0 | 2.051044 | 42.580782 |
1 | 2 | kams134 | Poland | 923 | 10477 | M2_01 Wolf 2020 | no | 1.0 | 0.950163 | 28.866807 |
2 | 3 | TailBot | Poland | 538 | 10472 | M2_01 Wolf 2020 | no | 2.0 | 1.620818 | 37.594590 |
3 | 4 | Pajabol | Poland | 820 | 10471 | M2_01 Wolf 2020 | no | 3.0 | 1.062195 | 30.416639 |
4 | 5 | Adzikov | Poland | 1105 | 10442 | M2_01 Wolf 2020 | no | 4.0 | 0.761991 | 25.329753 |
per_season_df = full_df.groupby(['season']).agg(
min_mmr = pd.NamedAgg('mmr', 'min'),
max_mmr = pd.NamedAgg('mmr', 'max'),
num_matches = pd.NamedAgg('matches', 'sum')
).reset_index()
top500_cutoffs = full_df[full_df['rank'] == 500][['season', 'mmr']].rename(columns={'mmr': 'top500_cutoff'})
top200_cutoffs = full_df[full_df['rank'] == 200][['season', 'mmr']].rename(columns={'mmr': 'top200_cutoff'})
top64_cutoffs = full_df[full_df['rank'] == 64][['season', 'mmr']].rename(columns={'mmr': 'top64_cutoff'})
per_season_df = pd.merge(per_season_df, top500_cutoffs, on='season')
per_season_df = pd.merge(per_season_df, top200_cutoffs, on='season')
per_season_df = pd.merge(per_season_df, top64_cutoffs, on='season')
per_season_df
season | min_mmr | max_mmr | num_matches | top500_cutoff | top200_cutoff | top64_cutoff | |
---|---|---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | 2407 | 10484 | 699496 | 9749 | 9872 | 10061 |
1 | M2_02 Love 2020 | 7776 | 10537 | 769172 | 9832 | 9952 | 10117 |
2 | M2_03 Bear 2020 | 9427 | 10669 | 862283 | 9867 | 9995 | 10204 |
3 | M2_04 Elf 2020 | 9666 | 10751 | 1004603 | 9952 | 10087 | 10293 |
4 | M2_05 Viper 2020 | 9635 | 10622 | 859640 | 9910 | 10028 | 10255 |
5 | M2_06 Magic 2020 | 9624 | 10597 | 793013 | 9896 | 10002 | 10191 |
6 | M2_07 Griffin 2020 | 9698 | 10667 | 996516 | 9978 | 10100 | 10289 |
7 | M2_08 Draconid 2020 | 9666 | 10546 | 837545 | 9946 | 10061 | 10246 |
8 | M2_09 Dryad 2020 | 9678 | 10725 | 854593 | 9946 | 10046 | 10183 |
9 | M2_10 Cat 2020 | 9703 | 10804 | 928845 | 9977 | 10067 | 10176 |
10 | M2_11 Mahakam 2020 | 9706 | 10783 | 983150 | 10000 | 10090 | 10216 |
11 | M2_12 Wild Hunt 2020 | 9756 | 10724 | 1182353 | 10070 | 10172 | 10313 |
12 | M3_01 Wolf 2021 | 9637 | 10653 | 808651 | 9916 | 10044 | 10295 |
13 | M3_02 Love 2021 | 9684 | 10714 | 917027 | 9975 | 10097 | 10325 |
14 | M3_03 Bear 2021 | 9637 | 10576 | 766502 | 9914 | 10026 | 10230 |
15 | M3_04 Elf 2021 | 9686 | 10678 | 944323 | 9992 | 10102 | 10323 |
16 | M3_05 Viper 2021 | 9701 | 10753 | 956484 | 9998 | 10106 | 10300 |
17 | M3_06 Magic 2021 | 9681 | 10632 | 869262 | 9974 | 10082 | 10278 |
18 | M3_07 Griffin 2021 | 9669 | 10633 | 856103 | 9958 | 10067 | 10287 |
19 | M3_08 Draconid 2021 | 9681 | 10767 | 911273 | 9954 | 10079 | 10281 |
20 | M3_09 Dryad 2021 | 9688 | 10809 | 940655 | 9968 | 10078 | 10250 |
21 | M3_10 Cat 2021 | 9614 | 10366 | 719696 | 9879 | 9974 | 10083 |
22 | M3_11 Mahakam 2021 | 9725 | 10580 | 1017256 | 10012 | 10122 | 10252 |
23 | M3_12 Wild Hunt 2021 | 9735 | 10714 | 1044941 | 10032 | 10140 | 10271 |
24 | M4_01 Wolf 2022 | 9646 | 10684 | 883881 | 9941 | 10093 | 10340 |
25 | M4_02 Love 2022 | 9638 | 10652 | 852867 | 9932 | 10068 | 10312 |
We can summerize player stats over all seasons easily using the code below. We'll store the number of times they were on pro ladder, the worst, best and average MMR scores, ...
player_summaries = full_df.groupby(['name', 'country']).agg(
appearances = pd.NamedAgg('matches', 'count'),
min_mmr = pd.NamedAgg('mmr', 'min'),
mean_mmr = pd.NamedAgg('mmr', 'min'),
max_mmr = pd.NamedAgg('mmr', 'max'),
mean_matches = pd.NamedAgg('matches', 'mean'),
num_matches = pd.NamedAgg('matches', 'sum'),
best_rank = pd.NamedAgg('rank', 'min'),
best_national_rank = pd.NamedAgg('national_rank', 'min'),
).reset_index()
player_summaries.head()
name | country | appearances | min_mmr | mean_mmr | max_mmr | mean_matches | num_matches | best_rank | best_national_rank | |
---|---|---|---|---|---|---|---|---|---|---|
0 | $Lancer$ | China | 2 | 9710 | 9710 | 9792 | 545.50 | 1091 | 2011 | 272.0 |
1 | $rBuRn$ | Spain | 2 | 9767 | 9767 | 9831 | 225.00 | 450 | 1176 | 29.0 |
2 | $r_BuRn$ | Spain | 4 | 9701 | 9701 | 9848 | 123.75 | 495 | 968 | 25.0 |
3 | ++++Gwynbleidd++++ | Poland | 1 | 9803 | 9803 | 9803 | 231.00 | 231 | 1482 | 122.0 |
4 | +Nightshadow+ | Ukraine | 1 | 9677 | 9677 | 9677 | 365.00 | 365 | 2261 | 187.0 |
player_summaries[player_summaries['name'] == 'sepro']
name | country | appearances | min_mmr | mean_mmr | max_mmr | mean_matches | num_matches | best_rank | best_national_rank | |
---|---|---|---|---|---|---|---|---|---|---|
21691 | sepro | Belgium | 7 | 9724 | 9724 | 9903 | 259.142857 | 1814 | 680 | 2.0 |
To calculate the number of pro players per capita we'll need the number of inhabitants per country. On Kaggle a dataset was available for 2019. As this doesn't differ much from one year to another (and those will only result into changes several digits behind the decimal point it) the 2019 list is enough for our purpose. The code below loads the data, and generates country names from the two character ISO code using the same package as used before to avoid mismatches.
# data from kaggle https://www.kaggle.com/erikbruin/countries-of-the-world-iso-codes-and-population/data#
# the number are indicated very strangely in the csv with a decimal point and need to be multiplied with 1000
population_df = pd.read_csv('./data/countries_by_population_2019.csv')[['name', 'pop2019']]
country_codes = pd.read_csv('./data/country_codes_2020.csv')[['name', 'cca2']]
population_df = pd.merge(population_df, country_codes, how='left', left_on='name', right_on='name')
population_df['country'] = population_df['cca2'].apply(lambda x: convert_to_full(x))
population_df['pop2019'] = (population_df['pop2019'] * 1000).astype(int)
could not convert nan
# Lets do some counts per season and country
national_stats = full_df.groupby(['season','country']).agg(
total_matches = pd.NamedAgg('matches', 'sum'),
num_players = pd.NamedAgg('name', 'count')
).reset_index()
# Add the population (from 2019) to each country and calculate the number of pro players (top 2860) per million inhabitants
national_stats = pd.merge(national_stats, population_df.drop(['name', 'cca2'], axis=1), on='country', how='left')
national_stats['pro_players_per_million'] = (national_stats['num_players']*1000000)/national_stats['pop2019']
national_stats['matches_per_player'] = national_stats['total_matches']/national_stats['num_players']
national_stats
season | country | total_matches | num_players | pop2019 | pro_players_per_million | matches_per_player | |
---|---|---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | Algeria | 250 | 1 | 43053054.0 | 0.023227 | 250.000000 |
1 | M2_01 Wolf 2020 | Argentina | 1480 | 4 | 44780677.0 | 0.089324 | 370.000000 |
2 | M2_01 Wolf 2020 | Armenia | 546 | 1 | 2957731.0 | 0.338097 | 546.000000 |
3 | M2_01 Wolf 2020 | Australia | 6193 | 27 | 25203198.0 | 1.071293 | 229.370370 |
4 | M2_01 Wolf 2020 | Austria | 3981 | 16 | 8955102.0 | 1.786691 | 248.812500 |
... | ... | ... | ... | ... | ... | ... | ... |
2313 | M4_02 Love 2022 | United Kingdom | 20656 | 71 | 67530172.0 | 1.051382 | 290.929577 |
2314 | M4_02 Love 2022 | United States | 47946 | 161 | 329064917.0 | 0.489265 | 297.801242 |
2315 | M4_02 Love 2022 | Uzbekistan | 1116 | 5 | 32981716.0 | 0.151599 | 223.200000 |
2316 | M4_02 Love 2022 | Viet Nam | 2277 | 10 | 96462106.0 | 0.103668 | 227.700000 |
2317 | M4_02 Love 2022 | Zimbabwe | 199 | 1 | 14645468.0 | 0.068281 | 199.000000 |
2318 rows × 7 columns
We can now calculate the number of pro players per million inhabitants, this could be a proxy for Gwent's popularity in different parts of the world
top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('pro_players_per_million', ascending=False).drop(columns=['pop2019'])
top_popularity.head(10)
season | country | total_matches | num_players | pro_players_per_million | matches_per_player | |
---|---|---|---|---|---|---|
657 | M2_08 Draconid 2020 | Poland | 72225 | 267 | 7.047129 | 270.505618 |
617 | M2_08 Draconid 2020 | Estonia | 1726 | 7 | 5.280436 | 246.571429 |
660 | M2_08 Draconid 2020 | Russian Federation | 195594 | 672 | 4.606770 | 291.062500 |
599 | M2_08 Draconid 2020 | Belarus | 10261 | 39 | 4.125931 | 263.102564 |
677 | M2_08 Draconid 2020 | Ukraine | 52333 | 162 | 3.682351 | 323.043210 |
626 | M2_08 Draconid 2020 | Iceland | 307 | 1 | 2.949583 | 307.000000 |
618 | M2_08 Draconid 2020 | Finland | 4053 | 14 | 2.530659 | 289.500000 |
624 | M2_08 Draconid 2020 | Hong Kong | 5154 | 18 | 2.420606 | 286.333333 |
644 | M2_08 Draconid 2020 | Malta | 229 | 1 | 2.270807 | 229.000000 |
610 | M2_08 Draconid 2020 | Croatia | 2247 | 9 | 2.179016 | 249.666667 |
An alternative way is to find places where the die-hard players are located, those that play the most games per season on average.
top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('matches_per_player', ascending=False).drop(columns=['pop2019'])
top_popularity.head(10)
season | country | total_matches | num_players | pro_players_per_million | matches_per_player | |
---|---|---|---|---|---|---|
611 | M2_08 Draconid 2020 | Cyprus | 1217 | 2 | 1.668648 | 608.500000 |
608 | M2_08 Draconid 2020 | Congo, The Democratic Republic of the | 570 | 1 | 0.011522 | 570.000000 |
593 | M2_08 Draconid 2020 | Armenia | 1002 | 2 | 0.676194 | 501.000000 |
631 | M2_08 Draconid 2020 | Ireland | 2988 | 6 | 1.228880 | 498.000000 |
596 | M2_08 Draconid 2020 | Azerbaijan | 1447 | 3 | 0.298575 | 482.333333 |
654 | M2_08 Draconid 2020 | Panama | 453 | 1 | 0.235491 | 453.000000 |
669 | M2_08 Draconid 2020 | Switzerland | 2580 | 6 | 0.698376 | 430.000000 |
603 | M2_08 Draconid 2020 | Bulgaria | 5516 | 13 | 1.857111 | 424.307692 |
662 | M2_08 Draconid 2020 | Serbia | 4074 | 10 | 1.139960 | 407.400000 |
656 | M2_08 Draconid 2020 | Philippines | 5347 | 14 | 0.129490 | 381.928571 |
We'll select countries with three or more playing in the pro rank, select the top 3 players and calculate the average MMR and total MMR for the top 3 players of the country.
num_player_included = 3 # Here we define the number of players in Pro Ladder to be considered
players_per_country = full_df.groupby(['season','country']).agg(
num_players = pd.NamedAgg('name', 'count')
).reset_index()
players_per_country = players_per_country[players_per_country['num_players'] >= num_player_included]
top_per_country = pd.merge(players_per_country[['season', 'country']], full_df, how='inner', on=['season', 'country'])
top_per_country = top_per_country[top_per_country['national_rank'] <= num_player_included]
top_per_country
season | country | rank | name | matches | mmr | previous_top500 | national_rank | efficiency | lei | |
---|---|---|---|---|---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | Argentina | 174 | Srchenko | 393 | 9887 | no | 1.0 | 0.730280 | 14.477235 |
1 | M2_01 Wolf 2020 | Argentina | 471 | eltroco | 339 | 9758 | no | 2.0 | 0.466077 | 8.581382 |
2 | M2_01 Wolf 2020 | Argentina | 846 | MorgannFreemann | 208 | 9660 | no | 3.0 | 0.288462 | 4.160251 |
4 | M2_01 Wolf 2020 | Australia | 135 | whitecool | 404 | 9914 | no | 1.0 | 0.777228 | 15.622084 |
5 | M2_01 Wolf 2020 | Australia | 211 | Pacifier3750 | 453 | 9863 | no | 2.0 | 0.580574 | 12.356818 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
73410 | M4_02 Love 2022 | Uzbekistan | 1045 | Frag. | 156 | 9781 | no | 2.0 | 1.160256 | 14.491598 |
73411 | M4_02 Love 2022 | Uzbekistan | 1119 | SMEAGOOL | 180 | 9770 | no | 3.0 | 0.944444 | 12.671052 |
73414 | M4_02 Love 2022 | Viet Nam | 655 | HaoThien | 325 | 9868 | no | 1.0 | 0.824615 | 14.865965 |
73415 | M4_02 Love 2022 | Viet Nam | 940 | captainstef | 401 | 9800 | no | 2.0 | 0.498753 | 9.987523 |
73416 | M4_02 Love 2022 | Viet Nam | 1381 | __cAnh | 292 | 9734 | no | 3.0 | 0.458904 | 7.841757 |
4956 rows × 10 columns
top_stats = top_per_country.groupby(['season','country']).agg(
mean_mmr = pd.NamedAgg('mmr', 'mean'),
total_mmr = pd.NamedAgg('mmr', 'sum'),
mean_matches_per_player = pd.NamedAgg('matches', 'mean'),
total_matches = pd.NamedAgg('matches', 'sum')
).reset_index()
# Here we add the same ranking and effeciency statistics for national teams as for individual players
top_stats['nation_rank'] = top_stats.groupby(['season'])["mean_mmr"].rank("first", ascending=False)
top_stats['efficiency'] = ((top_stats['mean_mmr']-9600))/top_stats['mean_matches_per_player']
top_stats['lei'] = ((top_stats['mean_mmr']-9600))/np.sqrt(top_stats['mean_matches_per_player'])
top_stats
season | country | mean_mmr | total_mmr | mean_matches_per_player | total_matches | nation_rank | efficiency | lei | |
---|---|---|---|---|---|---|---|---|---|
0 | M2_01 Wolf 2020 | Argentina | 9768.333333 | 29305 | 313.333333 | 940 | 34.0 | 0.537234 | 9.509700 |
1 | M2_01 Wolf 2020 | Australia | 9869.666667 | 29609 | 336.333333 | 1009 | 22.0 | 0.801784 | 14.704231 |
2 | M2_01 Wolf 2020 | Austria | 9905.666667 | 29717 | 392.333333 | 1177 | 19.0 | 0.779099 | 15.431938 |
3 | M2_01 Wolf 2020 | Belarus | 10064.333333 | 30193 | 390.000000 | 1170 | 11.0 | 1.190598 | 23.512432 |
4 | M2_01 Wolf 2020 | Belgium | 9901.000000 | 29703 | 415.666667 | 1247 | 21.0 | 0.724138 | 14.763655 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1647 | M4_02 Love 2022 | Ukraine | 10458.000000 | 31374 | 748.666667 | 2246 | 3.0 | 1.146037 | 31.357616 |
1648 | M4_02 Love 2022 | United Kingdom | 10223.666667 | 30671 | 692.000000 | 2076 | 12.0 | 0.901252 | 23.708249 |
1649 | M4_02 Love 2022 | United States | 10201.333333 | 30604 | 562.666667 | 1688 | 14.0 | 1.068720 | 25.350684 |
1650 | M4_02 Love 2022 | Uzbekistan | 9810.333333 | 29431 | 220.333333 | 661 | 53.0 | 0.954614 | 14.169940 |
1651 | M4_02 Love 2022 | Viet Nam | 9800.666667 | 29402 | 339.333333 | 1018 | 56.0 | 0.591356 | 10.893363 |
1652 rows × 9 columns
top_stats[top_stats['season'] == 'M2_08 Draconid 2020'].sort_values('nation_rank')
season | country | mean_mmr | total_mmr | mean_matches_per_player | total_matches | nation_rank | efficiency | lei | |
---|---|---|---|---|---|---|---|---|---|
432 | M2_08 Draconid 2020 | China | 10489.333333 | 31468 | 409.000000 | 1227 | 1.0 | 2.174409 | 43.974703 |
466 | M2_08 Draconid 2020 | Russian Federation | 10479.666667 | 31439 | 636.000000 | 1908 | 2.0 | 1.383124 | 34.881052 |
463 | M2_08 Draconid 2020 | Poland | 10439.333333 | 31318 | 657.000000 | 1971 | 3.0 | 1.277524 | 32.745512 |
440 | M2_08 Draconid 2020 | Germany | 10399.666667 | 31199 | 440.000000 | 1320 | 4.0 | 1.817424 | 38.122613 |
450 | M2_08 Draconid 2020 | Japan | 10345.000000 | 31035 | 708.666667 | 2126 | 5.0 | 1.051270 | 27.985642 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
446 | M2_08 Draconid 2020 | Iran, Islamic Republic of | 9787.000000 | 29361 | 264.000000 | 792 | 60.0 | 0.708333 | 11.509054 |
431 | M2_08 Draconid 2020 | Chile | 9782.666667 | 29348 | 310.000000 | 930 | 61.0 | 0.589247 | 10.374770 |
469 | M2_08 Draconid 2020 | Singapore | 9772.666667 | 29318 | 325.000000 | 975 | 62.0 | 0.531282 | 9.577823 |
483 | M2_08 Draconid 2020 | Venezuela, Bolivarian Republic of | 9766.000000 | 29298 | 271.333333 | 814 | 63.0 | 0.611794 | 10.077586 |
471 | M2_08 Draconid 2020 | Slovenia | 9743.666667 | 29231 | 273.333333 | 820 | 64.0 | 0.525610 | 8.689799 |
64 rows × 9 columns
Lets exports all data to Excel files so they can be further analysed without pandas/python.
full_df.to_excel('./output/player_stats.xlsx')
national_stats.to_excel('./output/national_stats.xlsx')
per_season_df.to_excel('./output/seasonal_stats.xlsx')
player_summaries.to_excel('./output/player_summaries.xlsx')
bracket = 500
returning = full_df[full_df['rank'] <= bracket].groupby(['season', 'previous_top500']).agg(
count = pd.NamedAgg('name', 'count')
).reset_index()
returning = returning[returning.previous_top500 == 'yes']
returning
season | previous_top500 | count | |
---|---|---|---|
2 | M2_02 Love 2020 | yes | 304 |
4 | M2_03 Bear 2020 | yes | 313 |
6 | M2_04 Elf 2020 | yes | 294 |
8 | M2_05 Viper 2020 | yes | 317 |
10 | M2_06 Magic 2020 | yes | 314 |
12 | M2_07 Griffin 2020 | yes | 277 |
14 | M2_08 Draconid 2020 | yes | 299 |
16 | M2_09 Dryad 2020 | yes | 264 |
18 | M2_10 Cat 2020 | yes | 277 |
20 | M2_11 Mahakam 2020 | yes | 266 |
22 | M2_12 Wild Hunt 2020 | yes | 242 |
24 | M3_01 Wolf 2021 | yes | 267 |
26 | M3_02 Love 2021 | yes | 286 |
28 | M3_03 Bear 2021 | yes | 286 |
30 | M3_04 Elf 2021 | yes | 266 |
32 | M3_05 Viper 2021 | yes | 284 |
34 | M3_06 Magic 2021 | yes | 270 |
36 | M3_07 Griffin 2021 | yes | 257 |
38 | M3_08 Draconid 2021 | yes | 230 |
40 | M3_09 Dryad 2021 | yes | 258 |
42 | M3_10 Cat 2021 | yes | 210 |
44 | M3_11 Mahakam 2021 | yes | 233 |
46 | M3_12 Wild Hunt 2021 | yes | 249 |
48 | M4_01 Wolf 2022 | yes | 299 |
50 | M4_02 Love 2022 | yes | 275 |
returning['%'] = returning['count'] * 100/bracket
np.mean(returning['%'])
54.695999999999984