This is a quick version that will only check the top500 players from last season to estimate the total number of pro players.
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd
import numpy as np
# Read list of players
players_df = pd.read_excel('./output/player_stats.xlsx').drop(columns=['Unnamed: 0'])
players_df.head()
rank | name | country | matches | mmr | season | previous_top500 | national_rank | efficiency | lei | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | kolemoen | Germany | 431 | 10484 | M2_01 Wolf 2020 | no | 1 | 2.051044 | 42.580782 |
1 | 2 | kams134 | Poland | 923 | 10477 | M2_01 Wolf 2020 | no | 1 | 0.950163 | 28.866807 |
2 | 3 | TailBot | Poland | 538 | 10472 | M2_01 Wolf 2020 | no | 2 | 1.620818 | 37.594590 |
3 | 4 | Pajabol | Poland | 820 | 10471 | M2_01 Wolf 2020 | no | 3 | 1.062195 | 30.416639 |
4 | 5 | Adzikov | Poland | 1105 | 10442 | M2_01 Wolf 2020 | no | 4 | 0.761991 | 25.329753 |
all_players = []
seasons = [
('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2020_extra.xlsx'),
('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/1/{user}', './output/season_of_love_2020_extra.xlsx'),
('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2020_extra.xlsx'),
('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2020_extra.xlsx'),
('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2020_extra.xlsx'),
('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/1/{user}', './output/season_of_magic_2020_extra.xlsx'),
('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/1/{user}', './output/season_of_the_griffin_2020_extra.xlsx'),
('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2020_extra.xlsx'),
('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2020_extra.xlsx'),
('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2020_extra.xlsx'),
('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/1/{user}', './output/season_of_the_mahakam_2020_extra.xlsx'),
('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2020_extra.xlsx'),
('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2021_extra.xlsx'),
('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/1/{user}', './output/season_of_love_2021_extra.xlsx'),
('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2021_extra.xlsx'),
('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2021_extra.xlsx'),
('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2021_extra.xlsx'),
('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/1/{user}', './output/season_of_magic_2021_extra.xlsx'),
('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/1/{user}', './output/season_of_griffin_2021_extra.xlsx'),
('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2021_extra.xlsx'),
('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2021_extra.xlsx'),
('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2021_extra.xlsx'),
('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/1/{user}', './output/season_of_the_mahakam_2021_extra.xlsx'),
('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2021_extra.xlsx'),
('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2022_extra.xlsx'),
('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/1/{user}', './output/season_of_love_2022_extra.xlsx'),
]
for season, url_template, output_path in seasons:
if os.path.exists(output_path):
print(f"{output_path} exists, loading file instead of downloading ...")
df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)
all_players = players_df[(players_df.season == season) & (players_df['rank'] <=500)].name.unique()
else:
output = []
known_players = players_df[players_df.season == season].name.values
unknown_players = [n for n in all_players if n not in known_players]
for player in tqdm(unknown_players):
url = url_template.replace('{user}', str(player))
try:
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
rows = soup.find_all("div", {"class": "c-ranking__inner-frame-found"})
for row in rows[:1]:
flag = row.find("i", {"class": "flag-icon"})["class"][1]
new_record = {
'rank': int(row.find("div", {"class": "td-number"}).text.strip()),
'name': row.find("div", {"class": "td-nick"}).text.strip(),
'country': flag.replace('flag-icon-', '').upper(),
'matches': int(row.find("div", {"class": "td-matches"}).text.strip().replace(' matches', '')),
'mmr': int(row.find("div", {"class": "td-mmr"}).text.strip().replace(',', '')),
'season': season
}
if 0 < new_record['matches']:
output.append(new_record)
except:
pass
df = pd.DataFrame(output).drop_duplicates()
df.to_excel(output_path)
all_players = players_df[(players_df.season == season) & (players_df['rank'] <= 500)].name.unique()
./output/season_of_the_wolf_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_love_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_bear_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_elf_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_viper_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_magic_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_griffin_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_draconid_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_dryad_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_cat_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_mahakam_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_wild_hunt_2020_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_wolf_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_love_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_bear_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_elf_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_viper_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_magic_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_griffin_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_draconid_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_dryad_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_cat_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_mahakam_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_wild_hunt_2021_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_the_wolf_2022_extra.xlsx exists, loading file instead of downloading ... ./output/season_of_love_2022_extra.xlsx exists, loading file instead of downloading ...