#Import the libraries and packages we need
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pandas as pd
import highlight_text
# the famous import font code to use Andale Mono
import matplotlib.font_manager
from IPython.core.display import HTML
def make_html(fontname):
return "<p>{font}: <span style='font-family:{font}; font-size: 24px;'>{font}</p>".format(font=fontname)
code = "\n".join([make_html(font) for font in sorted(set([f.name for f in matplotlib.font_manager.fontManager.ttflist]))])
df = pd.read_csv('1995_96 - 2020 Premier league standings.csv')
df
Rk | Squad | MP | W | D | L | GF | GA | Pts | Attendance | Top Team Scorer | Goalkeeper | Year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester Utd | 38 | 25 | 7 | 6 | 73 | 35 | 82 | 41001 | Eric Cantona - 14 | Peter Schmeichel | 1996 |
1 | 2 | Newcastle Utd | 38 | 24 | 6 | 8 | 66 | 37 | 78 | 36501 | Les Ferdinand - 25 | Shaka Hislop | 1996 |
2 | 3 | Liverpool | 38 | 20 | 11 | 7 | 70 | 34 | 71 | 39553 | Robbie Fowler - 28 | David James | 1996 |
3 | 4 | Aston Villa | 38 | 18 | 9 | 11 | 52 | 35 | 63 | 37492 | Dwight Yorke - 17 | Mark Bosnich | 1996 |
4 | 5 | Arsenal | 38 | 17 | 12 | 9 | 49 | 32 | 63 | 32614 | Ian Wright - 15 | David Seaman | 1996 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
495 | 16 | West Ham | 38 | 10 | 9 | 19 | 49 | 62 | 39 | 44155 | Michail Antonio - 10 | Łukasz Fabiański | 2020 |
496 | 17 | Aston Villa | 38 | 9 | 8 | 21 | 41 | 67 | 35 | 28505 | Jack Grealish - 8 | Tom Heaton | 2020 |
497 | 18 | Bournemouth | 38 | 9 | 7 | 22 | 40 | 65 | 34 | 7745 | Callum Wilson - 8 | Aaron Ramsdale | 2020 |
498 | 19 | Watford | 38 | 8 | 10 | 20 | 36 | 64 | 34 | 15353 | Troy Deeney - 10 | Ben Foster | 2020 |
499 | 20 | Norwich City | 38 | 5 | 6 | 27 | 26 | 75 | 21 | 19913 | Teemu Pukki - 11 | Tim Krul | 2020 |
500 rows × 13 columns
df['GD'] = df['GF'] - df['GA']
df
Rk | Squad | MP | W | D | L | GF | GA | Pts | Attendance | Top Team Scorer | Goalkeeper | Year | GD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester Utd | 38 | 25 | 7 | 6 | 73 | 35 | 82 | 41001 | Eric Cantona - 14 | Peter Schmeichel | 1996 | 38 |
1 | 2 | Newcastle Utd | 38 | 24 | 6 | 8 | 66 | 37 | 78 | 36501 | Les Ferdinand - 25 | Shaka Hislop | 1996 | 29 |
2 | 3 | Liverpool | 38 | 20 | 11 | 7 | 70 | 34 | 71 | 39553 | Robbie Fowler - 28 | David James | 1996 | 36 |
3 | 4 | Aston Villa | 38 | 18 | 9 | 11 | 52 | 35 | 63 | 37492 | Dwight Yorke - 17 | Mark Bosnich | 1996 | 17 |
4 | 5 | Arsenal | 38 | 17 | 12 | 9 | 49 | 32 | 63 | 32614 | Ian Wright - 15 | David Seaman | 1996 | 17 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
495 | 16 | West Ham | 38 | 10 | 9 | 19 | 49 | 62 | 39 | 44155 | Michail Antonio - 10 | Łukasz Fabiański | 2020 | -13 |
496 | 17 | Aston Villa | 38 | 9 | 8 | 21 | 41 | 67 | 35 | 28505 | Jack Grealish - 8 | Tom Heaton | 2020 | -26 |
497 | 18 | Bournemouth | 38 | 9 | 7 | 22 | 40 | 65 | 34 | 7745 | Callum Wilson - 8 | Aaron Ramsdale | 2020 | -25 |
498 | 19 | Watford | 38 | 8 | 10 | 20 | 36 | 64 | 34 | 15353 | Troy Deeney - 10 | Ben Foster | 2020 | -28 |
499 | 20 | Norwich City | 38 | 5 | 6 | 27 | 26 | 75 | 21 | 19913 | Teemu Pukki - 11 | Tim Krul | 2020 | -49 |
500 rows × 14 columns
y=df.Rk
x=df.GD.values.reshape(-1,1)
x.shape
(500, 1)
y.shape
(500,)
model = LinearRegression().fit(x,y)
r_sq = model.score(x,y)
intercept = model.intercept_
slope = model.coef_
y_pred = intercept + slope*x
fig,ax = plt.subplots(figsize=(10,10))
fig.set_facecolor('#f3edd3')
ax.patch.set_facecolor('#f3edd3')
ax.grid(ls='dotted',lw=.5,color='lightgrey',axis='y',zorder=1)
spines = ['top','right']
for i in spines:
if i in spines:
ax.spines[i].set_visible(False)
#plot data
plt.scatter(x,y,alpha=.7)
plt.plot(x,y_pred,c='red',linestyle='--',dashes=(5,5))
plt.ylim(.5,20.5)
plt.gca().invert_yaxis()
plt.yticks([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20])
plt.xlabel('Goal Differential',fontsize=18,fontname='Andale Mono')
plt.ylabel('Final Leage Rank',fontsize=18,fontname='Andale Mono')
plt.title('Evaluating the Relationship Between Goal Differential and League Table Rank\nPremier League Seasons 1995/96 - 2019/2020',ha='center',fontsize=16,fontname='Andale Mono')
#annotate teams
ax.annotate(xy=(79,.7),text='Man City,2017/18')
ax.annotate(xy=(-75,19.5),text='Derby County,2007/08')
#analysis
ax.annotate(xy=(-65,1.65),text=f'R-Squared = {round(r_sq,2)}\nThe regression equation: y = {intercept} + {slope} * x ',fontname='Andale Mono')
#ax.annotate(xy=(10,9),text='No team that finished 9th underperformed the model',fontname='Andale Mono')
#ax.annotate(xy=(-5,17),text='No team that finished 17th overperformed the model',fontname='Andale Mono')
ax.annotate(xy=(-65,3),text=f'The model fits the data very well.\nInterestingly enough, the spread of all the data points\nfor each rank is fairly small.',fontname='Andale Mono')
plt.savefig('premline.png',dpi=300,bbox_inches = 'tight',facecolor='#f3edd3')
df.sort_values(by='Rk',ascending=True).head(30)
Rk | Squad | MP | W | D | L | GF | GA | Pts | Attendance | Top Team Scorer | Goalkeeper | Year | GD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester Utd | 38 | 25 | 7 | 6 | 73 | 35 | 82 | 41001 | Eric Cantona - 14 | Peter Schmeichel | 1996 | 38 |
440 | 1 | Manchester City | 38 | 32 | 4 | 2 | 106 | 27 | 100 | 54070 | Sergio Agüero - 21 | Ederson | 2018 | 79 |
60 | 1 | Manchester Utd | 38 | 22 | 13 | 3 | 80 | 37 | 79 | 55188 | Dwight Yorke - 18 | Peter Schmeichel | 1999 | 43 |
420 | 1 | Chelsea | 38 | 30 | 3 | 5 | 85 | 33 | 93 | 41508 | Diego Costa - 20 | Thibaut Courtois | 2017 | 52 |
80 | 1 | Manchester Utd | 38 | 28 | 7 | 3 | 97 | 45 | 91 | 55107 | Dwight Yorke - 20 | Mark Bosnich | 2000 | 52 |
400 | 1 | Leicester City | 38 | 23 | 12 | 3 | 68 | 36 | 81 | 31998 | Jamie Vardy - 24 | Kasper Schmeichel | 2016 | 32 |
100 | 1 | Manchester Utd | 38 | 24 | 8 | 6 | 79 | 31 | 80 | 67489 | Teddy Sheringham - 15 | Fabien Barthez | 2001 | 48 |
380 | 1 | Chelsea | 38 | 26 | 9 | 3 | 73 | 32 | 87 | 41546 | Diego Costa - 20 | Thibaut Courtois | 2015 | 41 |
120 | 1 | Arsenal | 38 | 26 | 9 | 3 | 79 | 36 | 87 | 38055 | Thierry Henry - 24 | David Seaman | 2002 | 43 |
360 | 1 | Manchester City | 38 | 27 | 5 | 6 | 102 | 37 | 86 | 47075 | Yaya Touré - 20 | Joe Hart | 2014 | 65 |
140 | 1 | Manchester Utd | 38 | 25 | 8 | 5 | 74 | 34 | 83 | 67602 | Ruud van Nistelrooy - 25 | Fabien Barthez | 2003 | 40 |
340 | 1 | Manchester Utd | 38 | 28 | 5 | 5 | 86 | 43 | 89 | 75530 | Robin van Persie - 26 | David de Gea | 2013 | 43 |
160 | 1 | Arsenal | 38 | 26 | 12 | 0 | 73 | 26 | 90 | 38079 | Thierry Henry - 30 | Jens Lehmann | 2004 | 47 |
320 | 1 | Manchester City | 38 | 28 | 5 | 5 | 93 | 29 | 89 | 47045 | Sergio Agüero - 23 | Joe Hart | 2012 | 64 |
180 | 1 | Chelsea | 38 | 29 | 8 | 1 | 72 | 15 | 95 | 41870 | Frank Lampard - 13 | Petr Čech | 2005 | 57 |
300 | 1 | Manchester Utd | 38 | 23 | 11 | 4 | 78 | 37 | 80 | 75109 | Dimitar Berbatov - 20 | Edwin van der Sar | 2011 | 41 |
280 | 1 | Chelsea | 38 | 27 | 5 | 6 | 103 | 32 | 86 | 41423 | Didier Drogba - 29 | Petr Čech | 2010 | 71 |
200 | 1 | Chelsea | 38 | 29 | 4 | 5 | 72 | 22 | 91 | 41902 | Frank Lampard - 16 | Petr Čech | 2006 | 50 |
260 | 1 | Manchester Utd | 38 | 28 | 6 | 4 | 68 | 24 | 90 | 75304 | Cristiano Ronaldo - 18 | Edwin van der Sar | 2009 | 44 |
220 | 1 | Manchester Utd | 38 | 28 | 5 | 5 | 83 | 27 | 89 | 75826 | Cristiano Ronaldo - 17 | Edwin van der Sar | 2007 | 56 |
40 | 1 | Arsenal | 38 | 23 | 9 | 6 | 68 | 33 | 78 | 38053 | Dennis Bergkamp - 16 | David Seaman | 1998 | 35 |
460 | 1 | Manchester City | 38 | 32 | 2 | 4 | 95 | 23 | 98 | 54130 | Sergio Agüero - 21 | Ederson | 2019 | 72 |
240 | 1 | Manchester Utd | 38 | 27 | 6 | 5 | 80 | 22 | 87 | 75691 | Cristiano Ronaldo - 31 | Edwin van der Sar | 2008 | 58 |
480 | 1 | Liverpool | 38 | 32 | 3 | 3 | 85 | 33 | 99 | 41955 | Mohamed Salah - 19 | Alisson | 2020 | 52 |
20 | 1 | Manchester Utd | 38 | 21 | 12 | 5 | 76 | 44 | 75 | 55081 | Ole Gunnar Solskjær - 18 | Peter Schmeichel | 1997 | 32 |
481 | 2 | Manchester City | 38 | 26 | 3 | 9 | 102 | 35 | 81 | 37097 | Raheem Sterling - 20 | Ederson | 2020 | 67 |
161 | 2 | Chelsea | 38 | 24 | 7 | 7 | 67 | 30 | 79 | 41234 | Jimmy Floyd Hasselbaink - 13 | Carlo Cudicini | 2004 | 37 |
401 | 2 | Arsenal | 38 | 20 | 11 | 7 | 65 | 36 | 71 | 59944 | Olivier Giroud - 16 | Petr Čech | 2016 | 29 |
341 | 2 | Manchester City | 38 | 23 | 9 | 6 | 66 | 34 | 78 | 46974 | Edin Džeko - 14 | Joe Hart | 2013 | 32 |
441 | 2 | Manchester Utd | 38 | 25 | 6 | 7 | 68 | 28 | 81 | 74976 | Romelu Lukaku - 16 | David de Gea | 2018 | 40 |