import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale
messi_csv = pd.read_csv('messi_barca.csv', encoding = "ISO-8859-1", delimiter=',')
messi_csv
Season | Age | Competition | Matches Played | Starts | Minutes played | Goals scored | Assists | PK | PKatt | CrdY | CrdR | Goal/90 | Ast/90 | G+A/90 | G-PK/90 | G+A-PK/90 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2004-2005 | 17 | Champions League | 1 | 1 | 90 | 0 | 0 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
1 | 2004-2005 | 17 | La Liga | 7 | 0 | 70 | 1 | 0 | 0 | 0 | 0 | 0 | 1.29 | 0.00 | 1.29 | 1.29 | 1.29 |
2 | 2005-2006 | 18 | Champions League | 6 | 4 | 322 | 1 | 1 | 0 | 0 | 0 | 0 | 0.28 | 0.28 | 0.56 | 0.28 | 0.56 |
3 | 2005-2006 | 18 | La Liga | 17 | 11 | 911 | 6 | 3 | 0 | 0 | 2 | 0 | 0.59 | 0.30 | 0.89 | 0.59 | 0.89 |
4 | 2006-2007 | 19 | Champions League | 5 | 4 | 385 | 1 | 0 | 0 | 0 | 1 | 0 | 0.23 | 0.00 | 0.23 | 0.23 | 0.23 |
5 | 2006-2007 | 19 | La Liga | 26 | 23 | 1983 | 14 | 3 | 0 | 0 | 2 | 0 | 0.64 | 0.14 | 0.77 | 0.64 | 0.77 |
6 | 2007-2008 | 20 | Champions League | 9 | 9 | 728 | 6 | 2 | 1 | 1 | 2 | 0 | 0.74 | 0.25 | 0.99 | 0.62 | 0.87 |
7 | 2007-2008 | 20 | La Liga | 27 | 23 | 1973 | 10 | 12 | 4 | 4 | 2 | 0 | 0.46 | 0.55 | 1.00 | 0.27 | 0.82 |
8 | 2008-2009 | 21 | Champions League | 12 | 10 | 927 | 9 | 5 | 0 | 0 | 1 | 0 | 0.87 | 0.49 | 1.36 | 0.87 | 1.36 |
9 | 2008-2009 | 21 | La Liga | 31 | 27 | 2516 | 23 | 11 | 3 | 4 | 2 | 0 | 0.82 | 0.39 | 1.22 | 0.72 | 1.11 |
10 | 2009-2010 | 22 | Champions League | 11 | 11 | 987 | 8 | 0 | 0 | 0 | 0 | 0 | 0.73 | 0.00 | 0.73 | 0.73 | 0.73 |
11 | 2009-2010 | 22 | La Liga | 35 | 30 | 2805 | 34 | 9 | 1 | 1 | 3 | 0 | 1.09 | 0.29 | 1.38 | 1.06 | 1.35 |
12 | 2010-2011 | 23 | Champions League | 13 | 11 | 1046 | 12 | 3 | 1 | 2 | 0 | 0 | 1.03 | 0.26 | 1.29 | 0.95 | 1.20 |
13 | 2010-2011 | 23 | La Liga | 33 | 31 | 2858 | 31 | 19 | 4 | 4 | 3 | 0 | 0.98 | 0.60 | 1.57 | 0.85 | 1.45 |
14 | 2011-2012 | 24 | Champions League | 11 | 11 | 990 | 14 | 5 | 4 | 5 | 2 | 0 | 1.27 | 0.45 | 1.73 | 0.91 | 1.36 |
15 | 2011-2012 | 24 | La Liga | 37 | 36 | 3270 | 50 | 16 | 10 | 11 | 6 | 0 | 1.38 | 0.44 | 1.82 | 1.10 | 1.54 |
16 | 2012-2013 | 25 | Champions League | 11 | 9 | 826 | 8 | 2 | 0 | 0 | 0 | 0 | 0.87 | 0.22 | 1.09 | 0.87 | 1.09 |
17 | 2012-2013 | 25 | La Liga | 32 | 28 | 2650 | 46 | 11 | 4 | 4 | 1 | 0 | 1.56 | 0.37 | 1.94 | 1.43 | 1.80 |
18 | 2013-2014 | 26 | Champions League | 7 | 7 | 630 | 8 | 0 | 2 | 2 | 0 | 0 | 1.14 | 0.00 | 1.14 | 0.86 | 0.86 |
19 | 2013-2014 | 26 | La Liga | 31 | 29 | 2501 | 28 | 11 | 7 | 8 | 2 | 0 | 1.01 | 0.40 | 1.40 | 0.76 | 1.15 |
20 | 2014-2015 | 27 | Champions League | 13 | 13 | 1147 | 10 | 6 | 0 | 1 | 1 | 0 | 0.78 | 0.47 | 1.26 | 0.78 | 1.26 |
21 | 2014-2015 | 27 | Copa del Rey | 6 | 6 | 540 | 5 | 3 | 1 | 2 | 1 | 0 | 0.83 | 0.50 | 1.33 | 0.67 | 1.17 |
22 | 2014-2015 | 27 | La Liga | 38 | 37 | 3375 | 43 | 18 | 5 | 6 | 4 | 0 | 1.15 | 0.48 | 1.63 | 1.01 | 1.49 |
23 | 2015-2016 | 28 | Champions League | 7 | 7 | 630 | 6 | 1 | 1 | 1 | 1 | 0 | 0.86 | 0.14 | 1.00 | 0.71 | 0.86 |
24 | 2015-2016 | 28 | Copa del Rey | 5 | 5 | 480 | 5 | 5 | 0 | 0 | 1 | 0 | 0.94 | 0.94 | 1.87 | 0.94 | 1.87 |
25 | 2015-2016 | 28 | La Liga | 33 | 31 | 2729 | 26 | 16 | 3 | 6 | 3 | 0 | 0.86 | 0.53 | 1.39 | 0.76 | 1.29 |
26 | 2015-2016 | 28 | Supercopa de España | 2 | 2 | 180 | 1 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 |
27 | 2016-2017 | 29 | Champions League | 9 | 9 | 810 | 11 | 2 | 2 | 2 | 0 | 0 | 1.22 | 0.22 | 1.44 | 1.00 | 1.22 |
28 | 2016-2017 | 29 | Copa del Rey | 7 | 7 | 630 | 5 | 3 | 1 | 1 | 3 | 0 | 0.71 | 0.43 | 1.14 | 0.57 | 1.00 |
29 | 2016-2017 | 29 | La Liga | 34 | 32 | 2828 | 37 | 9 | 6 | 7 | 6 | 0 | 1.18 | 0.29 | 1.46 | 0.99 | 1.27 |
30 | 2016-2017 | 29 | Supercopa de España | 2 | 2 | 180 | 1 | 2 | 0 | 0 | 0 | 0 | 0.50 | 1.00 | 1.50 | 0.50 | 1.50 |
31 | 2017-2018 | 30 | Champions League | 10 | 8 | 783 | 6 | 2 | 0 | 0 | 2 | 0 | 0.69 | 0.23 | 0.92 | 0.69 | 0.92 |
32 | 2017-2018 | 30 | Copa del Rey | 6 | 6 | 508 | 4 | 4 | 0 | 1 | 1 | 0 | 0.71 | 0.71 | 1.42 | 0.71 | 1.42 |
33 | 2017-2018 | 30 | La Liga | 36 | 32 | 3002 | 34 | 12 | 2 | 4 | 3 | 0 | 1.02 | 0.36 | 1.38 | 0.96 | 1.32 |
34 | 2017-2018 | 30 | Supercopa de España | 2 | 2 | 180 | 1 | 0 | 1 | 1 | 1 | 0 | 0.50 | 0.00 | 0.50 | 0.00 | 0.00 |
35 | 2018-2019 | 31 | Champions League | 10 | 9 | 838 | 12 | 3 | 1 | 1 | 0 | 0 | 1.29 | 0.32 | 1.61 | 1.18 | 1.50 |
36 | 2018-2019 | 31 | Copa del Rey | 5 | 4 | 388 | 3 | 2 | 0 | 0 | 0 | 0 | 0.70 | 0.46 | 1.16 | 0.70 | 1.16 |
37 | 2018-2019 | 31 | La Liga | 34 | 29 | 2713 | 36 | 13 | 4 | 5 | 3 | 0 | 1.19 | 0.43 | 1.63 | 1.06 | 1.49 |
38 | 2018-2019 | 31 | Supercopa de España | 1 | 1 | 90 | 0 | 1 | 0 | 0 | 0 | 0 | 0.00 | 1.00 | 1.00 | 0.00 | 1.00 |
39 | 2019-2020 | 32 | Champions League | 8 | 7 | 662 | 3 | 3 | 0 | 0 | 2 | 0 | 0.41 | 0.41 | 0.82 | 0.41 | 0.82 |
40 | 2019-2020 | 32 | Copa del Rey | 2 | 2 | 180 | 2 | 1 | 0 | 0 | 1 | 0 | 1.00 | 0.50 | 1.50 | 1.00 | 1.50 |
41 | 2019-2020 | 32 | La Liga | 33 | 32 | 2880 | 25 | 20 | 5 | 5 | 4 | 0 | 0.78 | 0.62 | 1.41 | 0.62 | 1.25 |
42 | 2019-2020 | 32 | Supercopa de España | 1 | 1 | 90 | 1 | 0 | 0 | 0 | 0 | 0 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 |
messi_csv.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 43 entries, 0 to 42 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Season 43 non-null object 1 Age 43 non-null int64 2 Competition 43 non-null object 3 Matches Played 43 non-null int64 4 Starts 43 non-null int64 5 Minutes played 43 non-null int64 6 Goals scored 43 non-null int64 7 Assists 43 non-null int64 8 PK 43 non-null int64 9 PKatt 43 non-null int64 10 CrdY 43 non-null int64 11 CrdR 43 non-null int64 12 Goal/90 43 non-null float64 13 Ast/90 43 non-null float64 14 G+A/90 43 non-null float64 15 G-PK/90 43 non-null float64 16 G+A-PK/90 43 non-null float64 dtypes: float64(5), int64(10), object(2) memory usage: 5.8+ KB
messi_csv.keys()
Index(['Season', 'Age', 'Competition', 'Matches Played', 'Starts', 'Minutes played', 'Goals scored', 'Assists', 'PK', 'PKatt', 'CrdY', 'CrdR', 'Goal/90', 'Ast/90', 'G+A/90', 'G-PK/90', 'G+A-PK/90'], dtype='object')
X = np.array(messi_csv[['Age', 'Matches Played', 'Starts',
'Minutes played', 'Goals scored', 'Assists', 'PK', 'PKatt', 'CrdY',
'CrdR', 'Goal/90', 'Ast/90', 'G+A/90', 'G-PK/90', 'G+A-PK/90']])
competiciones = messi_csv['Competition']
comp = []
for c in competiciones:
if c == 'Champions League':
comp.append(0)
if c == 'La Liga':
comp.append(1)
if c == 'Copa del Rey':
comp.append(2)
if c == 'Supercopa de España':
comp.append(3)
Y = np.array(comp)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True)
Y
array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 1, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3])
knn = KNeighborsClassifier(5)
knn.fit(x_train, y_train)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
y_pred = knn.predict(x_test)
y_pred
array([0, 0, 1, 3, 0, 2, 1, 2, 0])
y_test
array([0, 0, 1, 3, 0, 0, 1, 2, 0])
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(cm).plot()
y_pred = knn.predict(x_train)
y_pred
array([0, 3, 1, 2, 0, 1, 1, 1, 3, 1, 1, 0, 0, 3, 0, 3, 3, 0, 1, 1, 1, 0, 1, 0, 2, 0, 3, 3, 0, 1, 2, 0, 3, 1])
y_train
array([0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 2, 3, 0, 1, 1, 1, 0, 1, 2, 2, 0, 3, 3, 0, 1, 2, 0, 3, 1])
cm = confusion_matrix(y_train, y_pred)
cm_display = ConfusionMatrixDisplay(cm).plot()
X = np.array(messi_csv[['Goals scored', 'Minutes played']])
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True)
knn.fit(x_train, y_train)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
y_pred = knn.predict(x_test)
knn.score(x_test, y_test)
0.8888888888888888
knn.score(x_train, y_train)
0.8235294117647058
cdict = {0: 'red', 1: 'green', 2: 'blue', 3: 'yellow'}
catdict= {0:'Champions League', 1: 'La Liga', 2:'Copa del Rey', 3:'Supercopa de España'}
figure, ax = plt.subplots(figsize=(8,8))
labels = Y
for g in np.unique(labels):
ix = np.where(g == labels)
ax.scatter(X[ix,0], X[ix,1], c = cdict[g], label = catdict[g], s = 45,edgecolor='k')
plt.xlabel('Goals Scored')
plt.ylabel('Minutes Played')
ax.legend()
plt.show()
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
h = 1.0 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto')
# Plot also the training points
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points')
# Plot also the testing points
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points')
plt.legend(loc="upper left")
plt.xlabel("Matches played")
plt.ylabel("Goals Scored")
plt.show()
X_scaled = minmax_scale(X)
x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=1234, shuffle=True)
knn = KNeighborsClassifier(5) # Jugar con este valor y ver la variacion en el resultado del grafico.
knn.fit(x_train, y_train)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X_scaled[:, 0].min() - 0.5, X_scaled[:, 0].max() + 0.5
y_min, y_max = X_scaled[:, 1].min() - 0.5, X_scaled[:, 1].max() + 0.5
h = 0.009 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto')
# Plot also the training points
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points')
# Plot also the testing points
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points')
plt.legend(loc="upper left")
plt.xlabel("Matches played")
plt.ylabel("Goals Scored")
plt.show()