#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt # In[2]: from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import confusion_matrix from sklearn.metrics import ConfusionMatrixDisplay # In[3]: from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import minmax_scale # In[4]: messi_csv = pd.read_csv('messi_barca.csv', encoding = "ISO-8859-1", delimiter=',') messi_csv # In[5]: messi_csv.info() # In[6]: messi_csv.keys() # # KNN # In[7]: X = np.array(messi_csv[['Age', 'Matches Played', 'Starts', 'Minutes played', 'Goals scored', 'Assists', 'PK', 'PKatt', 'CrdY', 'CrdR', 'Goal/90', 'Ast/90', 'G+A/90', 'G-PK/90', 'G+A-PK/90']]) # In[8]: competiciones = messi_csv['Competition'] comp = [] for c in competiciones: if c == 'Champions League': comp.append(0) if c == 'La Liga': comp.append(1) if c == 'Copa del Rey': comp.append(2) if c == 'Supercopa de España': comp.append(3) Y = np.array(comp) # In[9]: x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True) # In[10]: Y # In[11]: knn = KNeighborsClassifier(5) # In[12]: knn.fit(x_train, y_train) # # Matriz de confusion - validacion # In[13]: y_pred = knn.predict(x_test) # In[14]: y_pred # In[15]: y_test # In[16]: cm = confusion_matrix(y_test, y_pred) # In[17]: cm_display = ConfusionMatrixDisplay(cm).plot() # # Matriz de confusion - entrenamiento # In[18]: y_pred = knn.predict(x_train) # In[19]: y_pred # In[20]: y_train # In[21]: cm = confusion_matrix(y_train, y_pred) # In[22]: cm_display = ConfusionMatrixDisplay(cm).plot() # # Casos simplificado 2D # In[23]: X = np.array(messi_csv[['Goals scored', 'Minutes played']]) # In[24]: x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True) # In[25]: knn.fit(x_train, y_train) # In[26]: y_pred = knn.predict(x_test) # In[27]: knn.score(x_test, y_test) # In[28]: knn.score(x_train, y_train) # In[29]: cdict = {0: 'red', 1: 'green', 2: 'blue', 3: 'yellow'} catdict= {0:'Champions League', 1: 'La Liga', 2:'Copa del Rey', 3:'Supercopa de España'} figure, ax = plt.subplots(figsize=(8,8)) labels = Y for g in np.unique(labels): ix = np.where(g == labels) ax.scatter(X[ix,0], X[ix,1], c = cdict[g], label = catdict[g], s = 45,edgecolor='k') plt.xlabel('Goals Scored') plt.ylabel('Minutes Played') ax.legend() plt.show() # In[30]: # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 h = 1.0 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto') # Plot also the training points plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points') # Plot also the testing points plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points') plt.legend(loc="upper left") plt.xlabel("Matches played") plt.ylabel("Goals Scored") plt.show() # # KNN escalado # In[31]: X_scaled = minmax_scale(X) # In[32]: x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=1234, shuffle=True) # In[33]: knn = KNeighborsClassifier(5) # Jugar con este valor y ver la variacion en el resultado del grafico. # In[34]: knn.fit(x_train, y_train) # In[35]: # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X_scaled[:, 0].min() - 0.5, X_scaled[:, 0].max() + 0.5 y_min, y_max = X_scaled[:, 1].min() - 0.5, X_scaled[:, 1].max() + 0.5 h = 0.009 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto') # Plot also the training points plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points') # Plot also the testing points plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points') plt.legend(loc="upper left") plt.xlabel("Matches played") plt.ylabel("Goals Scored") plt.show()