import pandas as pd
from mplsoccer.pitch import Pitch
#read in csv
df = pd.read_csv('valladolidA.csv')
#filter df to get only the team we want
df = df[df['teamId']=='Barcelona']
#now we want to find our passes and recipients and then filter for only passes
df['passer'] = df['playerId']
df['recipient'] = df['playerId'].shift(-1)
#find passes and then only look for the successful passes
passes = df[df['type']=='Pass']
successful = passes[passes['outcome']=='Successful']
successful
id | eventId | minute | second | teamId | x | y | period | type | outcome | playerId | endX | endY | passer | recipient | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 2248226929 | 3 | 0 | 1.0 | Barcelona | 50.0 | 50.0 | 1 | Pass | Successful | 9.0 | 43.9 | 46.9 | 9.0 | 8.0 |
3 | 2248226941 | 4 | 0 | 2.0 | Barcelona | 43.8 | 46.9 | 1 | Pass | Successful | 8.0 | 36.4 | 56.0 | 8.0 | 21.0 |
4 | 2248226951 | 5 | 0 | 4.0 | Barcelona | 36.6 | 56.6 | 1 | Pass | Successful | 21.0 | 28.5 | 73.8 | 21.0 | 15.0 |
5 | 2248226973 | 6 | 0 | 5.0 | Barcelona | 28.3 | 74.1 | 1 | Pass | Successful | 15.0 | 11.6 | 51.8 | 15.0 | 1.0 |
6 | 2248226975 | 7 | 0 | 8.0 | Barcelona | 11.2 | 53.7 | 1 | Pass | Successful | 1.0 | 11.6 | 81.4 | 1.0 | 15.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1651 | 2248256867 | 1018 | 93 | 14.0 | Barcelona | 76.7 | 31.8 | 2 | Pass | Successful | 8.0 | 76.7 | 43.0 | 8.0 | 10.0 |
1652 | 2248256869 | 1019 | 93 | 17.0 | Barcelona | 79.6 | 24.2 | 2 | Pass | Successful | 10.0 | 77.1 | 53.6 | 10.0 | 14.0 |
1653 | 2248256983 | 1020 | 93 | 18.0 | Barcelona | 77.1 | 56.4 | 2 | Pass | Successful | 14.0 | 69.0 | 56.9 | 14.0 | 44721.0 |
1654 | 2248256989 | 1021 | 93 | 20.0 | Barcelona | 68.3 | 55.2 | 2 | Pass | Successful | 44721.0 | 87.0 | 17.3 | 44721.0 | 2.0 |
1667 | 2248257081 | 1027 | 93 | 50.0 | Barcelona | 75.6 | 4.6 | 2 | Pass | Successful | 2.0 | 77.1 | 26.8 | 2.0 | 10.0 |
635 rows × 15 columns
#find the first subsititution and filter the successful dataframe to be less than that minute
subs = df[df['type']=='SubstitutionOff']
subs = subs['minute']
firstSub = subs.min()
successful = successful[successful['minute'] < firstSub]
successful
id | eventId | minute | second | teamId | x | y | period | type | outcome | playerId | endX | endY | passer | recipient | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 2248226929 | 3 | 0 | 1.0 | Barcelona | 50.0 | 50.0 | 1 | Pass | Successful | 9.0 | 43.9 | 46.9 | 9.0 | 8.0 |
3 | 2248226941 | 4 | 0 | 2.0 | Barcelona | 43.8 | 46.9 | 1 | Pass | Successful | 8.0 | 36.4 | 56.0 | 8.0 | 21.0 |
4 | 2248226951 | 5 | 0 | 4.0 | Barcelona | 36.6 | 56.6 | 1 | Pass | Successful | 21.0 | 28.5 | 73.8 | 21.0 | 15.0 |
5 | 2248226973 | 6 | 0 | 5.0 | Barcelona | 28.3 | 74.1 | 1 | Pass | Successful | 15.0 | 11.6 | 51.8 | 15.0 | 1.0 |
6 | 2248226975 | 7 | 0 | 8.0 | Barcelona | 11.2 | 53.7 | 1 | Pass | Successful | 1.0 | 11.6 | 81.4 | 1.0 | 15.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1230 | 2248252877 | 782 | 69 | 30.0 | Barcelona | 83.1 | 42.8 | 2 | Pass | Successful | 9.0 | 89.9 | 21.5 | 9.0 | 2.0 |
1231 | 2248252881 | 783 | 69 | 32.0 | Barcelona | 90.5 | 23.0 | 2 | Pass | Successful | 2.0 | 78.6 | 36.9 | 2.0 | 8.0 |
1234 | 2248252897 | 785 | 69 | 36.0 | Barcelona | 84.6 | 27.7 | 2 | Pass | Successful | 8.0 | 90.3 | 14.3 | 8.0 | 2.0 |
1235 | 2248252905 | 786 | 69 | 37.0 | Barcelona | 90.7 | 14.3 | 2 | Pass | Successful | 2.0 | 88.4 | 15.3 | 2.0 | 8.0 |
1236 | 2248252913 | 787 | 69 | 38.0 | Barcelona | 88.4 | 15.3 | 2 | Pass | Successful | 8.0 | 80.1 | 33.0 | 8.0 | 16.0 |
505 rows × 15 columns
#this makes it so our passer and recipients are float values
pas = pd.to_numeric(successful['passer'],downcast='integer')
rec = pd.to_numeric(successful['recipient'],downcast='integer')
successful['passer'] = pas
successful['recipient'] = rec
<ipython-input-68-7dc55db856cb>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy successful['passer'] = pas <ipython-input-68-7dc55db856cb>:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy successful['recipient'] = rec
#now we need to find the average locations and counts of the passes
average_locations = successful.groupby('passer').agg({'x':['mean'],'y':['mean','count']})
average_locations.columns = ['x','y','count']
average_locations
x | y | count | |
---|---|---|---|
passer | |||
1 | 7.124000 | 50.324000 | 25 |
2 | 68.274359 | 11.915385 | 39 |
4 | 32.895833 | 43.518750 | 48 |
8 | 53.332927 | 41.608537 | 82 |
9 | 69.916667 | 51.383333 | 6 |
10 | 63.201613 | 42.351613 | 62 |
15 | 38.409091 | 74.378182 | 55 |
16 | 69.594286 | 69.471429 | 35 |
18 | 55.242553 | 83.793617 | 47 |
21 | 55.663636 | 52.263636 | 55 |
28 | 49.156863 | 16.282353 | 51 |
successful
id | eventId | minute | second | teamId | x | y | period | type | outcome | playerId | endX | endY | passer | recipient | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 2248226929 | 3 | 0 | 1.0 | Barcelona | 50.0 | 50.0 | 1 | Pass | Successful | 9.0 | 43.9 | 46.9 | 9 | 8 |
3 | 2248226941 | 4 | 0 | 2.0 | Barcelona | 43.8 | 46.9 | 1 | Pass | Successful | 8.0 | 36.4 | 56.0 | 8 | 21 |
4 | 2248226951 | 5 | 0 | 4.0 | Barcelona | 36.6 | 56.6 | 1 | Pass | Successful | 21.0 | 28.5 | 73.8 | 21 | 15 |
5 | 2248226973 | 6 | 0 | 5.0 | Barcelona | 28.3 | 74.1 | 1 | Pass | Successful | 15.0 | 11.6 | 51.8 | 15 | 1 |
6 | 2248226975 | 7 | 0 | 8.0 | Barcelona | 11.2 | 53.7 | 1 | Pass | Successful | 1.0 | 11.6 | 81.4 | 1 | 15 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1230 | 2248252877 | 782 | 69 | 30.0 | Barcelona | 83.1 | 42.8 | 2 | Pass | Successful | 9.0 | 89.9 | 21.5 | 9 | 2 |
1231 | 2248252881 | 783 | 69 | 32.0 | Barcelona | 90.5 | 23.0 | 2 | Pass | Successful | 2.0 | 78.6 | 36.9 | 2 | 8 |
1234 | 2248252897 | 785 | 69 | 36.0 | Barcelona | 84.6 | 27.7 | 2 | Pass | Successful | 8.0 | 90.3 | 14.3 | 8 | 2 |
1235 | 2248252905 | 786 | 69 | 37.0 | Barcelona | 90.7 | 14.3 | 2 | Pass | Successful | 2.0 | 88.4 | 15.3 | 2 | 8 |
1236 | 2248252913 | 787 | 69 | 38.0 | Barcelona | 88.4 | 15.3 | 2 | Pass | Successful | 8.0 | 80.1 | 33.0 | 8 | 16 |
505 rows × 15 columns
#now we need to find the number of passes between each player
pass_between = successful.groupby(['passer','recipient']).id.count().reset_index()
pass_between.rename({'id':'pass_count'},axis='columns',inplace=True)
#merge the average location dataframe. We need to merge on the passer first then the recipient
pass_between = pass_between.merge(average_locations, left_on='passer',right_index=True)
pass_between = pass_between.merge(average_locations, left_on='recipient',right_index=True,suffixes=['', '_end'])
pass_between
passer | recipient | pass_count | x | y | count | x_end | y_end | count_end | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | 3 | 7.124000 | 50.324000 | 25 | 68.274359 | 11.915385 | 39 |
18 | 4 | 2 | 1 | 32.895833 | 43.518750 | 48 | 68.274359 | 11.915385 | 39 |
26 | 8 | 2 | 8 | 53.332927 | 41.608537 | 82 | 68.274359 | 11.915385 | 39 |
35 | 9 | 2 | 2 | 69.916667 | 51.383333 | 6 | 68.274359 | 11.915385 | 39 |
39 | 10 | 2 | 6 | 63.201613 | 42.351613 | 62 | 68.274359 | 11.915385 | 39 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
52 | 15 | 10 | 1 | 38.409091 | 74.378182 | 55 | 63.201613 | 42.351613 | 62 |
60 | 16 | 10 | 10 | 69.594286 | 69.471429 | 35 | 63.201613 | 42.351613 | 62 |
70 | 18 | 10 | 7 | 55.242553 | 83.793617 | 47 | 63.201613 | 42.351613 | 62 |
77 | 21 | 10 | 14 | 55.663636 | 52.263636 | 55 | 63.201613 | 42.351613 | 62 |
87 | 28 | 10 | 5 | 49.156863 | 16.282353 | 51 | 63.201613 | 42.351613 | 62 |
90 rows × 9 columns
#set minimum threshold of combinations.. I like 5 for high passing teams. 2 or 3 for low passing.
pass_between = pass_between[pass_between['pass_count']>5]
pass_between
passer | recipient | pass_count | x | y | count | x_end | y_end | count_end | |
---|---|---|---|---|---|---|---|---|---|
26 | 8 | 2 | 8 | 53.332927 | 41.608537 | 82 | 68.274359 | 11.915385 | 39 |
39 | 10 | 2 | 6 | 63.201613 | 42.351613 | 62 | 68.274359 | 11.915385 | 39 |
83 | 28 | 2 | 10 | 49.156863 | 16.282353 | 51 | 68.274359 | 11.915385 | 39 |
27 | 8 | 4 | 8 | 53.332927 | 41.608537 | 82 | 32.895833 | 43.518750 | 48 |
50 | 15 | 4 | 9 | 38.409091 | 74.378182 | 55 | 32.895833 | 43.518750 | 48 |
84 | 28 | 4 | 13 | 49.156863 | 16.282353 | 51 | 32.895833 | 43.518750 | 48 |
11 | 2 | 8 | 11 | 68.274359 | 11.915385 | 39 | 53.332927 | 41.608537 | 82 |
19 | 4 | 8 | 6 | 32.895833 | 43.518750 | 48 | 53.332927 | 41.608537 | 82 |
41 | 10 | 8 | 17 | 63.201613 | 42.351613 | 62 | 53.332927 | 41.608537 | 82 |
51 | 15 | 8 | 6 | 38.409091 | 74.378182 | 55 | 53.332927 | 41.608537 | 82 |
68 | 18 | 8 | 6 | 55.242553 | 83.793617 | 47 | 53.332927 | 41.608537 | 82 |
76 | 21 | 8 | 13 | 55.663636 | 52.263636 | 55 | 53.332927 | 41.608537 | 82 |
85 | 28 | 8 | 13 | 49.156863 | 16.282353 | 51 | 53.332927 | 41.608537 | 82 |
21 | 4 | 15 | 20 | 32.895833 | 43.518750 | 48 | 38.409091 | 74.378182 | 55 |
30 | 8 | 15 | 6 | 53.332927 | 41.608537 | 82 | 38.409091 | 74.378182 | 55 |
71 | 18 | 15 | 7 | 55.242553 | 83.793617 | 47 | 38.409091 | 74.378182 | 55 |
78 | 21 | 15 | 7 | 55.663636 | 52.263636 | 55 | 38.409091 | 74.378182 | 55 |
31 | 8 | 16 | 8 | 53.332927 | 41.608537 | 82 | 69.594286 | 69.471429 | 35 |
53 | 15 | 16 | 6 | 38.409091 | 74.378182 | 55 | 69.594286 | 69.471429 | 35 |
72 | 18 | 16 | 10 | 55.242553 | 83.793617 | 47 | 69.594286 | 69.471429 | 35 |
32 | 8 | 18 | 6 | 53.332927 | 41.608537 | 82 | 55.242553 | 83.793617 | 47 |
54 | 15 | 18 | 15 | 38.409091 | 74.378182 | 55 | 55.242553 | 83.793617 | 47 |
62 | 16 | 18 | 11 | 69.594286 | 69.471429 | 35 | 55.242553 | 83.793617 | 47 |
23 | 4 | 21 | 6 | 32.895833 | 43.518750 | 48 | 55.663636 | 52.263636 | 55 |
33 | 8 | 21 | 10 | 53.332927 | 41.608537 | 82 | 55.663636 | 52.263636 | 55 |
46 | 10 | 21 | 12 | 63.201613 | 42.351613 | 62 | 55.663636 | 52.263636 | 55 |
55 | 15 | 21 | 10 | 38.409091 | 74.378182 | 55 | 55.663636 | 52.263636 | 55 |
73 | 18 | 21 | 9 | 55.242553 | 83.793617 | 47 | 55.663636 | 52.263636 | 55 |
16 | 2 | 28 | 7 | 68.274359 | 11.915385 | 39 | 49.156863 | 16.282353 | 51 |
24 | 4 | 28 | 7 | 32.895833 | 43.518750 | 48 | 49.156863 | 16.282353 | 51 |
34 | 8 | 28 | 11 | 53.332927 | 41.608537 | 82 | 49.156863 | 16.282353 | 51 |
47 | 10 | 28 | 7 | 63.201613 | 42.351613 | 62 | 49.156863 | 16.282353 | 51 |
81 | 21 | 28 | 7 | 55.663636 | 52.263636 | 55 | 49.156863 | 16.282353 | 51 |
48 | 15 | 1 | 6 | 38.409091 | 74.378182 | 55 | 7.124000 | 50.324000 | 25 |
13 | 2 | 10 | 11 | 68.274359 | 11.915385 | 39 | 63.201613 | 42.351613 | 62 |
29 | 8 | 10 | 23 | 53.332927 | 41.608537 | 82 | 63.201613 | 42.351613 | 62 |
60 | 16 | 10 | 10 | 69.594286 | 69.471429 | 35 | 63.201613 | 42.351613 | 62 |
70 | 18 | 10 | 7 | 55.242553 | 83.793617 | 47 | 63.201613 | 42.351613 | 62 |
77 | 21 | 10 | 14 | 55.663636 | 52.263636 | 55 | 63.201613 | 42.351613 | 62 |
#plot the pitch
pitch = pitch = Pitch(pitch_type='statsbomb', orientation='horizontal',
pitch_color='#22312b', line_color='#c7d5cc', figsize=(13, 8),
constrained_layout=True, tight_layout=False)
fig, ax = pitch.draw()
#plot the arrows
arrows = pitch.arrows(1.2*pass_between.x,.8*pass_between.y,1.2*pass_between.x_end,.8*pass_between.y_end,
width = 5, headwidth = 5, color = 'w', ax = ax, zorder = 1, alpha = .5)
#plot the nodes
nodes = pitch.scatter(1.2*average_locations.x,.8*average_locations.y,
s = 300, color = '#d3d3d3', edgecolors = 'black', linewidth = 2.5, alpha = 1, zorder = 1, ax=ax)
#make annotations