#!/usr/bin/env python # coding: utf-8 # In[192]: import pandas as pd import matplotlib.pyplot as plt import matplotlib as mpl import seaborn as sns import highlight_text # In[209]: # the famous import font code to use Andale Mono import matplotlib.font_manager from IPython.core.display import HTML def make_html(fontname): return "
{font}: {font}
".format(font=fontname) code = "\n".join([make_html(font) for font in sorted(set([f.name for f in matplotlib.font_manager.fontManager.ttflist]))]) # In[51]: #import data df = pd.read_csv('beeswarmTutorial.csv') # In[202]: #set default colors text_color = 'white' background = '#313332' # In[53]: #look at top of dataframe df.head() # In[54]: #create a new column for progressive passes per 90 df['per90'] = df['Prog']/df['90s'] df # In[55]: #filter the dataframe so it is only players who have played more than 6.5 90's which is about 585 minutes df = df[df['90s']>=6.5].reset_index() df # In[56]: df.describe() # In[60]: df = df.sort_values(by='per90',ascending=False) # In[62]: df = df[df['Pos'] != 'GK'] # In[80]: df.head(10) # In[85]: fig, ax = plt.subplots(figsize=(10,5)) fig.set_facecolor(background) ax.patch.set_facecolor(background) #set up our base layer mpl.rcParams['xtick.color'] = text_color mpl.rcParams['ytick.color'] = text_color ax.grid(ls='dotted',lw=.5,color='lightgrey',axis='y',zorder=1) spines = ['top','bottom','left','right'] for x in spines: if x in spines: ax.spines[x].set_visible(False) sns.swarmplot(x='per90',data=df,color='white',zorder=1) #plot thiago plt.scatter(x=9.87,y=0,c='red',edgecolor='white',s=200,zorder=2) plt.text(s='Thiago',x=9.87,y=-.04,c=text_color) #plot de bruyne plt.scatter(x=7.564,y=0,c='blue',edgecolor='white',s=200,zorder=2) plt.title('Progressive Passes in the Premier League 2020/21',c=text_color,fontsize=14) plt.xlabel('Progressive Passes per 90',c=text_color) #plt.savefig('swarm.png',dpi=500,bbox_inches = 'tight',facecolor=background) # In[184]: #import our next dataframe df2 = pd.read_csv('beeswarm2.csv') # In[185]: df2.head(10) # In[186]: #do some data preprocessing and cleaning #split the player names df2['Player'] = df2['Player'].str.split('\\',expand=True)[0] df2 = df2[df2['Pos'] != 'GK'] df2 = df2[df2['90s'] > 6.5].reset_index() # In[187]: #make the per 90 stats #metrics = ['Prog90','1/390','xA90','Cmp%','KP90','PPA90'] df2['Prog'] = df2['Prog'] / df2['90s'] df2['1/3'] = df2['1/3'] / df2['90s'] df2['xA'] = df2['xA'] / df2['90s'] df2['KP'] = df2['KP'] / df2['90s'] df2['PPA'] = df2['PPA'] / df2['90s'] # In[188]: print(df2.head()) df2.Player.unique() # In[189]: #create a list of 6 metrics to compare metrics = ['Prog','1/3','xA','Cmp%','KP','PPA'] # In[232]: fig,axes = plt.subplots(3,2,figsize=(14,10)) fig.set_facecolor(background) ax.patch.set_facecolor(background) #set up our base layer mpl.rcParams['xtick.color'] = text_color mpl.rcParams['ytick.color'] = text_color #create a list of comparisons counter=0 counter2=0 met_counter = 0 for i,ax in zip(df2['Player'],axes.flatten()): ax.set_facecolor(background) ax.grid(ls='dotted',lw=.5,color='lightgrey',axis='y',zorder=1) spines = ['top','bottom','left','right'] for x in spines: if x in spines: ax.spines[x].set_visible(False) sns.swarmplot(x=metrics[met_counter],data=df2,ax=axes[counter,counter2],zorder=1,color='#64645e') ax.set_xlabel(f'{metrics[met_counter]}',c='white') for x in range(len(df2['Player'])): #if df2['Player'][x] == 'Thiago Alcántara': #ax.scatter(x=df2[metrics[met_counter]][x],y=0,s=200,c='red',zorder=2) if df2['Player'][x] == 'İlkay Gündoğan': ax.scatter(x=df2[metrics[met_counter]][x],y=0,s=200,c='#6CABDD',zorder=2) met_counter+=1 if counter2 == 0: counter2 = 1 continue if counter2 == 1: counter2 = 0 counter+=1 s='