#!/usr/bin/env python # coding: utf-8 # In[4]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import os; import sys; sys.path.append('../') import pandas as pd import tqdm import warnings warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) import socceraction.vaep as vaep # In[5]: ## Configure file and folder names datafolder = "../data" spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5") predictions_h5 = os.path.join(datafolder,"predictions.h5") # In[6]: games = pd.read_hdf(spadl_h5,"games") games = games[games.competition_name == "FIFA World Cup"] print("nb of games:", len(games)) # In[7]: players = pd.read_hdf(spadl_h5,"players") teams = pd.read_hdf(spadl_h5,"teams") actiontypes = pd.read_hdf(spadl_h5, "actiontypes") bodyparts = pd.read_hdf(spadl_h5, "bodyparts") results = pd.read_hdf(spadl_h5, "results") A = [] for game in tqdm.tqdm(list(games.itertuples())): actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}") actions = ( actions.merge(actiontypes) .merge(results) .merge(bodyparts) .merge(players,"left",on="player_id") .merge(teams,"left",on="team_id") .sort_values(["period_id", "time_seconds", "timestamp"]) .reset_index(drop=True) ) preds = pd.read_hdf(predictions_h5,f"game_{game.game_id}") values = vaep.value(actions,preds.scores,preds.concedes) A.append(pd.concat([actions,preds,values],axis=1)) A = pd.concat(A).sort_values(["game_id","period_id", "time_seconds", "timestamp"]).reset_index(drop=True) A.columns # ### (optional) inspect Belgium's top 10 most valuable non-shot actions # In[8]: import matplotsoccer sorted_A = A.sort_values("vaep_value", ascending=False) sorted_A = sorted_A[sorted_A.team_name == "Belgium"] # view only actions from Belgium sorted_A = sorted_A[~sorted_A.type_name.str.contains("shot")] #eliminate shots def get_time(period_id,time_seconds): m = int((period_id-1)*45 + time_seconds // 60) s = time_seconds % 60 if s == int(s): s = int(s) return f"{m}m{s}s" for j in range(0,10): row = list(sorted_A[j:j+1].itertuples())[0] i = row.Index a = A[i - 3 : i+2].copy() a["player"] = a[["player_nickname","player_name"]].apply(lambda x: x[0] if x[0] else x[1],axis=1) g = list(games[games.game_id == a.game_id.values[0]].itertuples())[0] game_info = f"{g.match_date} {g.home_team_name} {g.home_score}-{g.away_score} {g.away_team_name}" minute = int((row.period_id-1)*45 + row.time_seconds // 60) print(f"{game_info} {minute}' {row.type_name} {row.player_name}") a["scores"] = a.scores.apply(lambda x : "%.3f" % x ) a["vaep_value"] = a.vaep_value.apply(lambda x : "%.3f" % x ) a["time"] = a[["period_id","time_seconds"]].apply(lambda x: get_time(*x),axis=1) cols = ["time","type_name","player","team_name","scores","vaep_value"] matplotsoccer.actions(a[["start_x","start_y","end_x","end_y"]], a.type_name, team=a.team_name, result = a.result_name == "success", label=a[cols], labeltitle = cols, zoom=False)