#import packages
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
#read in data from csv this is from a premier league game Southampton vs Bournemouth in 2019
#I scrpaed this data from understat. I would recommend checking out the tutorial on scraping understat on my Youtube channel
#https://www.youtube.com/c/McKayJohns
df = pd.read_csv('xgtutorial.csv')
df
Unnamed: 0 | minute | team | xG | player | result | |
---|---|---|---|---|---|---|
0 | 0 | 1 | Southampton | 0.105402 | Nathan Redmond | BlockedShot |
1 | 1 | 2 | Southampton | 0.136320 | Jan Bednarek | BlockedShot |
2 | 2 | 2 | Southampton | 0.037217 | Oriol Romeu | BlockedShot |
3 | 3 | 2 | Southampton | 0.021404 | James Ward-Prowse | BlockedShot |
4 | 4 | 3 | Southampton | 0.066363 | Pierre-Emile Højbjerg | BlockedShot |
5 | 5 | 9 | Southampton | 0.363826 | Nathan Redmond | SavedShot |
6 | 6 | 11 | Southampton | 0.354095 | Shane Long | Goal |
7 | 7 | 17 | Southampton | 0.036274 | Nathan Redmond | MissedShots |
8 | 8 | 21 | Southampton | 0.076804 | Danny Ings | MissedShots |
9 | 9 | 23 | Southampton | 0.380378 | Shane Long | SavedShot |
10 | 10 | 23 | Southampton | 0.608956 | Shane Long | ShotOnPost |
11 | 11 | 47 | Southampton | 0.027636 | Ryan Bertrand | MissedShots |
12 | 12 | 49 | Southampton | 0.026454 | Nathan Redmond | MissedShots |
13 | 13 | 54 | Southampton | 0.033908 | James Ward-Prowse | Goal |
14 | 14 | 57 | Southampton | 0.429492 | Danny Ings | SavedShot |
15 | 15 | 58 | Southampton | 0.021995 | Danny Ings | SavedShot |
16 | 16 | 60 | Southampton | 0.050620 | Matt Targett | BlockedShot |
17 | 17 | 60 | Southampton | 0.053497 | Nathan Redmond | MissedShots |
18 | 18 | 66 | Southampton | 0.086690 | Matt Targett | Goal |
19 | 19 | 71 | Southampton | 0.011735 | Pierre-Emile Højbjerg | MissedShots |
20 | 20 | 79 | Southampton | 0.023274 | Nathan Redmond | MissedShots |
21 | 21 | 81 | Southampton | 0.018172 | Pierre-Emile Højbjerg | MissedShots |
22 | 22 | 19 | Bournemouth | 0.516069 | Dan Gosling | Goal |
23 | 23 | 31 | Bournemouth | 0.016307 | David Brooks | SavedShot |
24 | 24 | 31 | Bournemouth | 0.515468 | Callum Wilson | Goal |
25 | 25 | 51 | Bournemouth | 0.025648 | Jack Simpson | MissedShots |
26 | 26 | 72 | Bournemouth | 0.034942 | Ryan Fraser | MissedShots |
27 | 27 | 82 | Bournemouth | 0.017407 | Joshua King | MissedShots |
28 | 28 | 85 | Bournemouth | 0.973102 | Callum Wilson | Goal |
29 | 29 | 87 | Bournemouth | 0.506683 | Callum Wilson | SavedShot |
30 | 30 | 94 | Bournemouth | 0.024924 | Joshua King | MissedShots |
#now that we have our dataframe set up, we are going to create some lists to plot the different xG values
#4 lists - home and away xg and minutes
#We start these with zero so our charts will start at 0
a_xG = [0]
h_xG= [0]
a_min = [0]
h_min = [0]
#this finds our team names from the dataframe. This will only work as long as both teams took a shot
hteam = df['team'].iloc[0]
ateam = df['team'].iloc[-1]
for x in range(len(df['xG'])):
if df['team'][x]==ateam:
a_xG.append(df['xG'][x])
a_min.append(df['minute'][x])
if df['team'][x]==hteam:
h_xG.append(df['xG'][x])
h_min.append(df['minute'][x])
#this is the function we use to make our xG values be cumulative rather than single shot values
#it goes through the list and adds the numbers together
def nums_cumulative_sum(nums_list):
return [sum(nums_list[:i+1]) for i in range(len(nums_list))]
a_cumulative = nums_cumulative_sum(a_xG)
h_cumulative = nums_cumulative_sum(h_xG)
#this is used to find the total xG. It just creates a new variable from the last item in the cumulative list
alast = round(a_cumulative[-1],2)
hlast = round(h_cumulative[-1],2)
fig, ax = plt.subplots(figsize = (10,5))
fig.set_facecolor('#3d4849')
ax.patch.set_facecolor('#3d4849')
#set up our base layer
mpl.rcParams['xtick.color'] = 'white'
mpl.rcParams['ytick.color'] = 'white'
ax.grid(ls='dotted',lw=.5,color='lightgrey',axis='y',zorder=1)
spines = ['top','bottom','left','right']
for x in spines:
if x in spines:
ax.spines[x].set_visible(False)
plt.xticks([0,15,30,45,60,75,90])
plt.xlabel('Minute',fontname='Andale Mono',color='white',fontsize=16)
plt.ylabel('xG',fontname='Andale Mono',color='white',fontsize=16)
#plot the step graphs
ax.step(x=a_min,y=a_cumulative,color='#d3d3d3',label=ateam,linewidth=5,where='post')
ax.step(x=h_min,y=h_cumulative,color='#fd3607',label=ateam,linewidth=5,where='post')
[<matplotlib.lines.Line2D at 0x10013cb8>]