# Python ≥3.5 (ideally)
import platform
import sys, getopt
assert sys.version_info >= (3, 5)
import csv
# Import Dependencies
%matplotlib inline
# Math Operations
import numpy as np
from math import pi
# Datetime
import datetime
from datetime import date
import time
# Data Preprocessing
import pandas as pd # version 1.0.3
import os # used to read the csv filenames
import re
import random
from io import BytesIO
from pathlib import Path
# Reading directories
import glob
import os
# Working with JSON
import json
from pandas.io.json import json_normalize
from ast import literal_eval
# Data Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-whitegrid')
import missingno as msno # visually display missing data
# Display in Jupyter
from IPython.display import Image, YouTubeVideo
from IPython.core.display import HTML
# Ignore Warnings
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")
print('Setup Complete')
Setup Complete
# Python / module versions used here for reference
print('Python: {}'.format(platform.python_version()))
print('NumPy: {}'.format(np.__version__))
print('pandas: {}'.format(pd.__version__))
print('matplotlib: {}'.format(mpl.__version__))
print('Seaborn: {}'.format(sns.__version__))
Python: 3.7.6 NumPy: 1.18.1 pandas: 1.0.1 matplotlib: 3.1.3 Seaborn: 0.10.0
# Set up initial paths to subfolders
base_dir = os.path.join('..', '..', '..', )
data_dir = os.path.join(base_dir, 'data')
data_dir_fbref = os.path.join(base_dir, 'data', 'fbref')
data_dir_stratabet = os.path.join(base_dir, 'data', 'stratabet')
img_dir = os.path.join(base_dir, 'img')
fig_dir = os.path.join(base_dir, 'img', 'fig')
video_dir = os.path.join(base_dir, 'video')
pd.set_option('display.max_columns', None)
# Combine individual csv files to form one DataFrame, df_raw, using glob
all_files = glob.glob(data_dir_stratabet + '/downloaded/collection/Chances' + "/*.csv") # Creates a list of all csv files
li = [] # pd.concat takes a list of DataFrames as an argument
for filename in all_files:
df_raw_temp = pd.read_csv(filename, index_col=None, header=0)
li.append(df_raw_temp)
df_chances = pd.concat(li, axis=0, ignore_index=True) # ignore_index=True as we don't want pandas to try an align row indexes
df_chances = df_chances.sort_values(['competition', 'kickoffDate', 'kickoffTime'], ascending=[True, True, True])
df_chances
index | competition | gsm_id | kickoffDate | kickoffTime | hometeam_team1 | awayteam_team2 | icon | chanceRating | team | type | time | player | location_x | location_y | bodyPart | shotQuality | defPressure | numDefPlayers | numAttPlayers | outcome | primaryPlayer | primaryType | primaryLocation_x | primaryLocation_y | secondaryPlayer | secondaryType | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7116 | 2444 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | verygoodchance | verygoodchance | Melbourne Victory | Open Play | 15:57 | Baro | 3.0 | 28.0 | Head | 2 | 4 | 2 | 0 | Missed | J. Troisi | Free Kick | -120.0 | 37.0 | - | - |
7117 | 2445 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | poorchance | poorchance | Brisbane Roar | Open Play | 00:52 | B. Borello | 1.0 | 79.0 | Left | 1 | 1 | 1 | 0 | Missed | T. Broich | Open Play Pass | 65.0 | 86.0 | - | - |
7118 | 2446 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | poorchance | poorchance | Melbourne Victory | Open Play | 06:16 | F. Ben Khalfallah | -43.0 | 86.0 | Right | 1 | 3 | 3 | 0 | Defended | - | - | - | - | - | - |
7119 | 2447 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | goodchance | goodchance | Melbourne Victory | Open Play | 09:22 | F. Ben Khalfallah | -26.0 | 41.0 | Left | 2 | 3 | 3 | 0 | Defended | M. Austin | Cross High | 103.0 | 32.0 | - | - |
7120 | 2448 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | poorchance | poorchance | Melbourne Victory | Open Play | 09:32 | J. Troisi | -6.0 | 99.0 | Right | 2 | 2 | 3 | 0 | Defended | - | - | - | - | - | - |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
65405 | 139 | TurSL | 2291443 | 2017-06-03 | 14:00:00 | Galatasaray | Konyaspor | poorchance | poorchance | Galatasaray | Open Play | 29:38 | S. Inan | -41.0 | 124.0 | Right | 2 | 0 | 2 | 1 | Missed | - | - | - | - | - | - |
65406 | 140 | TurSL | 2291443 | 2017-06-03 | 14:00:00 | Galatasaray | Konyaspor | fairlygoodchance | fairlygoodchance | Galatasaray | Dangerous Moment | 03:50 | W. Sneijder | -3.0 | 29.0 | Right | 0 | 4 | 1 | 0 | Defended | S. Gumus | Cross Low | -38.0 | 0.0 | - | - |
65407 | 141 | TurSL | 2291443 | 2017-06-03 | 14:00:00 | Galatasaray | Konyaspor | fairlygoodchance | fairlygoodchance | Galatasaray | Open Play | 37:33 | T. Cigerci | 13.0 | 36.0 | Other | 2 | 4 | 3 | 1 | Defended | W. Sneijder | Corner | 136.0 | 0.0 | L. Podolski | Corner Won |
65408 | 142 | TurSL | 2291443 | 2017-06-03 | 14:00:00 | Galatasaray | Konyaspor | poorchance | poorchance | Galatasaray | Open Play | 43:20 | M. Linnes | 47.0 | 93.0 | Right | 2 | 0 | 3 | 0 | Missed | Bruma | Open Play Pass | 92.0 | 98.0 | S. Inan | Open Play Pass |
65409 | 143 | TurSL | 2291443 | 2017-06-03 | 14:00:00 | Galatasaray | Konyaspor | poorchance | poorchance | Konyaspor | Open Play | 44:43 | M. Mbamba | 17.0 | 77.0 | Right | 2 | 0 | 4 | 1 | Defended | - | - | - | - | - | - |
95927 rows × 27 columns
lst_competitions = list(df_chances['competition'].unique())
for i, g in df_chances.groupby('competition'):
g.to_csv(data_dir_stratabet + '/raw/chances/individual_competitions/stratabet_events_chances_{}.csv'.format(i), header=True, index_label=True)
#df_chances.to_csv(data_dir_stratabet + '/raw/chances/' + 'stratabet_chances_all.csv', index=None, header=True)
# Combine individual csv files to form one DataFrame, df_raw, using glob
all_files = glob.glob(data_dir_stratabet + '/downloaded/danclarke93' + "/*.csv") # Creates a list of all csv files
li = [] # pd.concat takes a list of DataFrames as an argument
for filename in all_files:
df_raw_temp = pd.read_csv(filename, index_col=None, header=0)
li.append(df_raw_temp)
df_chances = pd.concat(li, axis=0, ignore_index=True) # ignore_index=True as we don't want pandas to try an align row indexes
df_chances = df_chances.sort_values(['competition', 'kickoffDate', 'kickoffTime'], ascending=[True, True, True])
df_chances
Unnamed: 0 | competition | gsm_id | kickoffDate | kickoffTime | hometeam_team1 | awayteam_team2 | icon | chanceRating | team | type | time | player | location_x | location_y | bodyPart | shotQuality | defPressure | numDefPlayers | numAttPlayers | outcome | primaryPlayer | primaryType | primaryLocation_x | primaryLocation_y | secondaryPlayer | secondaryType | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
21344 | 11404 | EngCh | 2467215 | 2017-08-04 | 18:45:00 | Nottingham Forest | Millwall | goodchance | goodchance | Nottingham Forest | Open Play | 74:07 | Z. Clough | -16.0 | 56.0 | Left | 3 | 5 | 2 | 0 | Defended | K. Dowell | Open Play Pass | -58.0 | 32.0 | - | - |
21345 | 11401 | EngCh | 2467215 | 2017-08-04 | 18:45:00 | Nottingham Forest | Millwall | poorchance | poorchance | Nottingham Forest | Open Play | 80:18 | B. McKay | 12.0 | 102.0 | Right | 3 | 4 | 2 | 0 | Defended | A. Traore | Open Play Pass | 102.0 | 171.0 | - | - |
21346 | 11402 | EngCh | 2467215 | 2017-08-04 | 18:45:00 | Nottingham Forest | Millwall | poorchance | poorchance | Millwall | Open Play | 77:58 | F. Onyedinma | -62.0 | 16.0 | Right | 3 | 5 | 2 | 0 | Saved | - | - | - | - | - | - |
21347 | 11403 | EngCh | 2467215 | 2017-08-04 | 18:45:00 | Nottingham Forest | Millwall | verygoodchance | verygoodchance | Millwall | Dangerous Moment | 75:18 | S. Morison | 32.0 | 12.0 | Left | 0 | 0 | 3 | 1 | Defended | J. Wallace | Cross High | -70.0 | 22.0 | - | - |
21348 | 11405 | EngCh | 2467215 | 2017-08-04 | 18:45:00 | Nottingham Forest | Millwall | fairlygoodchance | fairlygoodchance | Millwall | Open Play | 68:04 | L. Gregory | 11.0 | 45.0 | Left | 1 | 5 | 3 | 0 | Missed | S. Morison | Open Play Pass | 29.0 | 35.0 | J. Wallace | Cross Low |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
21422 | 80263 | SpaPr | 2529514 | 2018-03-12 | 20:00:00 | Deportivo Alaves | Real Betis | fairlygoodchance | fairlygoodchance | Deportivo Alaves | Open Play | 88:24 | Bojan | 10.0 | 60.0 | Right | 2 | 1 | 4 | 0 | Missed | H. Perez | Cross High | -115.0 | 56.0 | Munir | Open Play Pass |
21423 | 10892 | SpaPr | 2529514 | 2018-03-12 | 20:00:00 | Deportivo Alaves | Real Betis | goal | Very Good | Real Betis | Open Play | 77:08 | Loren Moron | -47.0 | 30.0 | Right | 3 | 2 | 2 | 0 | - | R. Boudebouz | Open Play Pass | -94.0 | 160.0 | Fabian Ruiz | Open Play Pass |
21424 | 10891 | SpaPr | 2529514 | 2018-03-12 | 20:00:00 | Deportivo Alaves | Real Betis | goal | Very Good | Deportivo Alaves | Open Play | 66:06 | Ruben Sobrino | -35.0 | 47.0 | Right | 3 | 3 | 1 | 0 | - | Martin | Open Play Pass | -9.0 | 196.0 | Tomas Pina | Open Play Pass |
21425 | 10890 | SpaPr | 2529514 | 2018-03-12 | 20:00:00 | Deportivo Alaves | Real Betis | goal | Great | Real Betis | Open Play | 43:20 | Javi Garcia | 30.0 | 12.0 | Right | 4 | 1 | 1 | 0 | - | A. Mandi | Shot (Opposition Rebound) | 34.0 | 33.0 | R. Boudebouz | Corner |
21426 | 10889 | SpaPr | 2529514 | 2018-03-12 | 20:00:00 | Deportivo Alaves | Real Betis | goal | Great | Real Betis | Open Play | 22:39 | Loren Moron | -38.0 | 44.0 | Right | 3 | 1 | 1 | 0 | - | Victor Camarasa | Open Play Pass | -56.0 | 112.0 | Francis Guerrero | Throw in |
27254 rows × 27 columns
lst_competitions = list(df_chances['competition'].unique())
for i, g in df_chances.groupby('competition'):
g.to_csv(data_dir_stratabet + '/raw/chances/individual_competitions/stratabet_events_chances_{}.csv'.format(i), header=True, index_label=True)
# Combine individual csv files to form one DataFrame, df_raw, using glob
all_files = glob.glob(data_dir_stratabet + '/downloaded/collection/Mins Played' + "/*.csv") # Creates a list of all csv files
li = [] # pd.concat takes a list of DataFrames as an argument
for filename in all_files:
df_raw_temp = pd.read_csv(filename, index_col=None, header=0)
li.append(df_raw_temp)
df_mins = pd.concat(li, axis=0, ignore_index=True) # ignore_index=True as we don't want pandas to try an align row indexes
df_mins = df_mins.sort_values(['competition', 'kick_off_dt'], ascending=[True, True])
df_mins
index | comp_id | competition | end_added | end_min | fixture_id | fixture_week | kick_off_dt | player | player_id | season | season_id | start_added | start_min | start_status | team | team_id | location | mins_played | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
15018 | 0 | 283 | AusAL | 0 | 90 | 2238877 | 1.0 | 2016-10-07 | M. Theo | 17526 | 2016/2017 | 12512 | 0 | 0 | Start | Brisbane Roar | 4321 | home | 90 |
15019 | 1 | 283 | AusAL | 0 | 90 | 2238877 | 1.0 | 2016-10-07 | J. Hingert | 88322 | 2016/2017 | 12512 | 0 | 0 | Start | Brisbane Roar | 4321 | home | 90 |
15020 | 2 | 283 | AusAL | 0 | 90 | 2238877 | 1.0 | 2016-10-07 | C. Brown | 193345 | 2016/2017 | 12512 | 0 | 0 | Start | Brisbane Roar | 4321 | home | 90 |
15021 | 3 | 283 | AusAL | 0 | 90 | 2238877 | 1.0 | 2016-10-07 | J. North | 17558 | 2016/2017 | 12512 | 0 | 0 | Start | Brisbane Roar | 4321 | home | 90 |
15022 | 4 | 283 | AusAL | 0 | 90 | 2238877 | 1.0 | 2016-10-07 | L. DeVere | 56394 | 2016/2017 | 12512 | 0 | 0 | Start | Brisbane Roar | 4321 | home | 90 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3535 | 372 | 19 | TurSL | 0 | 90 | 2506732 | 5.0 | 2017-09-18 | J. Jonsson | 196079 | 2017/2018 | 14277 | 0 | -1 | Unused Sub | Konyaspor | 2223 | away | 0 |
3536 | 373 | 19 | TurSL | 0 | 90 | 2506732 | 5.0 | 2017-09-18 | D. Milosevic | 215347 | 2017/2018 | 14277 | 0 | 71 | Sub | Konyaspor | 2223 | away | 19 |
3537 | 374 | 19 | TurSL | 0 | 90 | 2506732 | 5.0 | 2017-09-18 | V. Bora | 150431 | 2017/2018 | 14277 | 0 | -1 | Unused Sub | Konyaspor | 2223 | away | 0 |
3538 | 375 | 19 | TurSL | 0 | 90 | 2506732 | 5.0 | 2017-09-18 | M. Araz | 154283 | 2017/2018 | 14277 | 0 | 63 | Sub | Konyaspor | 2223 | away | 27 |
3539 | 376 | 19 | TurSL | 0 | 90 | 2506732 | 5.0 | 2017-09-18 | P. Eze | 316804 | 2017/2018 | 14277 | 0 | -1 | Unused Sub | Konyaspor | 2223 | away | 0 |
155124 rows × 19 columns
lst_competitions = list(df_mins['competition'].unique())
for i, g in df_mins.groupby('competition'):
g.to_csv(data_dir_stratabet + '/raw/mins_played/individual_competitions/stratabet_events_mins_played_{}.csv'.format(i), header=True, index_label=True)
df_mins.to_csv(data_dir_stratabet + '/raw/mins_played/' + 'stratabet_mins_played_all.csv', index=None, header=True)
# Combine individual csv files to form one DataFrame, df_raw, using glob
all_files = glob.glob(data_dir_stratabet + '/downloaded/collection/Key Entries' + "/*.csv") # Creates a list of all csv files
li = [] # pd.concat takes a list of DataFrames as an argument
for filename in all_files:
df_raw_temp = pd.read_csv(filename, index_col=None, header=0)
li.append(df_raw_temp)
df_key_entries = pd.concat(li, axis=0, ignore_index=True) # ignore_index=True as we don't want pandas to try an align row indexes
df_key_entries
index | competition | gsm_id | kickoffDate | kickoffTime | hometeam_team1 | awayteam_team2 | team | keyentryArea | keyentryType | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | AusAL | 2239006 | 2017-04-09 | 07:00:00 | Central Coast Mariners | Newcastle Jets | Central Coast Mariners | Right | Pass |
1 | 1 | AusAL | 2239006 | 2017-04-09 | 07:00:00 | Central Coast Mariners | Newcastle Jets | Central Coast Mariners | Left | Run |
2 | 2 | AusAL | 2239006 | 2017-04-09 | 07:00:00 | Central Coast Mariners | Newcastle Jets | Central Coast Mariners | Left | Run |
3 | 3 | AusAL | 2239006 | 2017-04-09 | 07:00:00 | Central Coast Mariners | Newcastle Jets | Newcastle Jets | Box | Run |
4 | 4 | AusAL | 2239006 | 2017-04-09 | 07:00:00 | Central Coast Mariners | Newcastle Jets | Central Coast Mariners | Box | Pass |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
195682 | 2178 | GerBL2 | 2255467 | 2017-04-16 | 11:30:00 | Bochum | Greuther Furth | Bochum | Right | Pass |
195683 | 2179 | GerBL2 | 2255467 | 2017-04-16 | 11:30:00 | Bochum | Greuther Furth | Bochum | Box | Pass |
195684 | 2180 | GerBL2 | 2255467 | 2017-04-16 | 11:30:00 | Bochum | Greuther Furth | Greuther Furth | Right | Run |
195685 | 2181 | GerBL2 | 2255467 | 2017-04-16 | 11:30:00 | Bochum | Greuther Furth | Greuther Furth | Box | Pass |
195686 | 2182 | GerBL2 | 2255467 | 2017-04-16 | 11:30:00 | Bochum | Greuther Furth | Bochum | Right | Pass |
195687 rows × 10 columns
df_key_entries = df_key_entries.sort_values(['competition', 'kickoffDate', 'kickoffTime'], ascending=[True, True, True])
df_key_entries
index | competition | gsm_id | kickoffDate | kickoffTime | hometeam_team1 | awayteam_team2 | team | keyentryArea | keyentryType | |
---|---|---|---|---|---|---|---|---|---|---|
6550 | 5788 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | Melbourne Victory | Left | Run |
6551 | 5789 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | Melbourne Victory | Box | Pass |
6552 | 5790 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | Brisbane Roar | Left | Pass |
6553 | 5791 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | Brisbane Roar | Box | Run |
6554 | 5792 | AusAL | 2238877 | 2016-10-07 | 08:50:00 | Brisbane Roar | Melbourne Victory | Melbourne Victory | Left | Pass |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
78488 | 210 | TurSL | 2291441 | 2017-06-03 | 14:00:00 | Trabzonspor | Bursaspor | Bursaspor | Left | Pass |
78489 | 211 | TurSL | 2291441 | 2017-06-03 | 14:00:00 | Trabzonspor | Bursaspor | Trabzonspor | Right | Pass |
78490 | 212 | TurSL | 2291441 | 2017-06-03 | 14:00:00 | Trabzonspor | Bursaspor | Trabzonspor | Right | Pass |
78491 | 213 | TurSL | 2291441 | 2017-06-03 | 14:00:00 | Trabzonspor | Bursaspor | Trabzonspor | Right | Pass |
78492 | 214 | TurSL | 2291441 | 2017-06-03 | 14:00:00 | Trabzonspor | Bursaspor | Trabzonspor | Left | Pass |
195687 rows × 10 columns
lst_competitions = list(df_key_entries['competition'].unique())
for i, g in df_key_entries.groupby('competition'):
g.to_csv(data_dir_stratabet + '/raw/key_entries/individual_competitions/stratabet_events_key_entries_{}.csv'.format(i), header=True, index_label=True)
df_key_entries.to_csv(data_dir_stratabet + '/raw/key_entries/' + 'stratabet_key_entries_all.csv', index=None, header=True)
# Combine individual csv files to form one DataFrame, df_raw, using glob
all_files = glob.glob(data_dir_stratabet + '/downloaded/collection/Match Info' + "/*.csv") # Creates a list of all csv files
li = [] # pd.concat takes a list of DataFrames as an argument
for filename in all_files:
df_raw_temp = pd.read_csv(filename, index_col=None, header=0)
li.append(df_raw_temp)
df_match_info = pd.concat(li, axis=0, ignore_index=True) # ignore_index=True as we don't want pandas to try an align row indexes
df_match_info
index | competition | gsm_id | awayteam_team2 | hometeam_team1 | kickoffDate | kickoffTime | pitchType | weatherType | tempCelcius | tempType | formation_team1 | formation_team2 | formation_time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | AusAL | 2239006 | Newcastle Jets | Central Coast Mariners | 2017-04-09 | 07:00:00 | Average | Cloud, Sun | 23 | - | 4_1_4_1 | 4_2_3_1 | 83:35 |
1 | 1 | AusAL | 2239005 | Brisbane Roar | Perth Glory | 2017-04-08 | 12:00:00 | Average | Clear | 18 | - | 4_4_2 | 4_2_3_1 | 79:52 |
2 | 2 | AusAL | 2239004 | Melbourne Victory | Western Sydney Wanderers | 2017-04-08 | 09:50:00 | Average | Cloud, Sun | 22 | Warm | 4_2_3_1 | 4_2_3_1 | 79:18 |
3 | 3 | AusAL | 2239003 | Sydney | Wellington Phoenix | 2017-04-08 | 07:35:00 | Average | Cloud | 13 | - | 3_1_3_1_2 | 4_2_3_1 | 84:35 |
4 | 4 | AusAL | 2239003 | Sydney | Wellington Phoenix | 2017-04-08 | 07:35:00 | Average | Cloud | 13 | - | 4_1_2_1_2 | 4_2_3_1 | 65:16 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7223 | 428 | TurSL | 2291147 | Kasimpasa | Trabzonspor | 2016-08-20 | 16:30:00 | Average, Wet | Cloud, Light Rain | 25 | Warm | 4_1_4_1 | 4_2_3_1 | 81:11 |
7224 | 429 | TurSL | 2291147 | Kasimpasa | Trabzonspor | 2016-08-20 | 16:30:00 | Average, Wet | Cloud | 25 | Warm | 4_1_4_1 | 4_2_3_1 | 81:11 |
7225 | 430 | TurSL | 2291151 | Konyaspor | Rizespor | 2016-08-20 | 16:30:00 | Good | Clear | 25 | Warm | 4_1_4_1 | 4_4_2 | 84:29 |
7226 | 431 | TurSL | 2291151 | Konyaspor | Rizespor | 2016-08-20 | 16:30:00 | Good | Clear | 25 | Warm | 4_2_3_1 | 4_4_2 | 68:11 |
7227 | 432 | TurSL | 2291149 | Bursaspor | Adanaspor | 2016-08-19 | 18:45:00 | Average, Good, Patchy | Clear | 26 | - | 4_2_3_1 | 4_2_3_1 | 83:04 |
7228 rows × 14 columns
df_match_info = df_match_info.sort_values(['competition', 'kickoffDate', 'kickoffTime'], ascending=[True, True, True])
df_match_info
index | competition | gsm_id | awayteam_team2 | hometeam_team1 | kickoffDate | kickoffTime | pitchType | weatherType | tempCelcius | tempType | formation_team1 | formation_team2 | formation_time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
192 | 173 | AusAL | 2238877 | Melbourne Victory | Brisbane Roar | 2016-10-07 | 08:50:00 | Excellent | Clear | 21 | Warm | 4_3_3 | 4_3_3 | 88:06 |
190 | 171 | AusAL | 2238878 | Melbourne City | Wellington Phoenix | 2016-10-08 | 06:35:00 | Good | Cloud | 14 | - | 4_2_3_1 | 4_4_2 | 69:06 |
191 | 172 | AusAL | 2238878 | Melbourne City | Wellington Phoenix | 2016-10-08 | 06:35:00 | Good | Cloud | 14 | - | 4_2_3_1 | 3_5_2 | 61:19 |
188 | 169 | AusAL | 2238879 | Sydney | Western Sydney Wanderers | 2016-10-08 | 08:50:00 | Excellent | Clear | 17 | Warm | 4_3_3 | 4_2_3_1 | 83:12 |
189 | 170 | AusAL | 2238879 | Sydney | Western Sydney Wanderers | 2016-10-08 | 08:50:00 | Excellent | Clear | 17 | Warm | 4_3_3 | 5_4_1 | 91:47 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
6795 | 0 | TurSL | 2291442 | Alanyaspor | Rizespor | 2017-06-03 | 14:00:00 | Good | Clear, Sun | 19 | Warm | 3_5_2 | 4_1_4_1 | 77:00 |
6796 | 1 | TurSL | 2291449 | Osmanlispor | Besiktas | 2017-06-03 | 14:00:00 | Excellent | Cloud, Sun | 28 | - | 4_2_3_1 | 4_2_3_1 | 74:10 |
6797 | 2 | TurSL | 2291444 | Fenerbahce | Adanaspor | 2017-06-03 | 14:00:00 | Good | Sun | 28 | Hot | 4_5_1 | 4_2_3_1 | 80:52 |
6798 | 3 | TurSL | 2291443 | Konyaspor | Galatasaray | 2017-06-03 | 14:00:00 | Average, Soft | Sun | 27 | Hot | 4_2_3_1 | 4_2_2_2 | 80:14 |
6799 | 4 | TurSL | 2291441 | Bursaspor | Trabzonspor | 2017-06-03 | 14:00:00 | Excellent | Cloud, Sun | 22 | Warm | 4_2_3_1 | 4_2_3_1 | 73:59 |
7228 rows × 14 columns
lst_competitions = list(df_match_info['competition'].unique())
for i, g in df_match_info.groupby('competition'):
g.to_csv(data_dir_stratabet + '/raw/match_info/individual_competitions/stratabet_match_info_{}.csv'.format(i), header=True, index_label=True)
df_match_info.to_csv(data_dir_stratabet + '/raw/match_info/' + 'stratabet_match_info_all.csv', index=None, header=True)