#!/usr/bin/env python # coding: utf-8 # In[23]: import pandas as pd import numpy as np from tqdm.notebook import tqdm import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[24]: from xgboost import XGBClassifier from sklearn.metrics import brier_score_loss, roc_auc_score from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.calibration import CalibratedClassifierCV from scikitplot.metrics import plot_calibration_curve # In[25]: import warnings warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning) # In[26]: import os data_dir = os.getcwd() + '/data/wy_scout/' # In[27]: df_games = pd.read_hdf(data_dir + 'spadl.h5', 'games') df_games.tail(10) # In[28]: dfs_features = [] for _, game in tqdm(df_games.iterrows(), total=len(df_games)): game_id = game['game_id'] df_features = pd.read_hdf(data_dir + 'features.h5', key=f'game_{game_id}') df_features['game_id'] = game_id dfs_features.append(df_features) df_features = pd.concat(dfs_features).reset_index(drop=True) df_features.tail(10) # In[29]: dfs_labels = [] """ for _, game in tqdm(df_games.iterrows(), total=len(df_games)): game_id = game['game_id'] df_labels = pd.read_hdf() """