#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# In[2]:


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay


# In[3]:


from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import minmax_scale


# In[4]:


messi_csv = pd.read_csv('messi_barca.csv', encoding = "ISO-8859-1", delimiter=',')  
messi_csv


# In[5]:


messi_csv.info()


# In[6]:


messi_csv.keys()


# # KNN

# In[7]:


X = np.array(messi_csv[['Age', 'Matches Played', 'Starts',
       'Minutes played', 'Goals scored', 'Assists', 'PK', 'PKatt', 'CrdY',
       'CrdR', 'Goal/90', 'Ast/90', 'G+A/90', 'G-PK/90', 'G+A-PK/90']])


# In[8]:


competiciones = messi_csv['Competition']
comp = []
for c in competiciones:
    if c == 'Champions League':
        comp.append(0)
    if c == 'La Liga':
        comp.append(1)
    if c == 'Copa del Rey':
        comp.append(2)
    if c == 'Supercopa de España':
        comp.append(3)
        
Y = np.array(comp)


# In[9]:


x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True)


# In[10]:


Y


# In[11]:


knn = KNeighborsClassifier(5)


# In[12]:


knn.fit(x_train, y_train)


# #  Matriz de confusion - validacion

# In[13]:


y_pred = knn.predict(x_test)


# In[14]:


y_pred


# In[15]:


y_test


# In[16]:


cm = confusion_matrix(y_test, y_pred)


# In[17]:


cm_display = ConfusionMatrixDisplay(cm).plot()


# #  Matriz de confusion - entrenamiento

# In[18]:


y_pred = knn.predict(x_train)


# In[19]:


y_pred


# In[20]:


y_train


# In[21]:


cm = confusion_matrix(y_train, y_pred)


# In[22]:


cm_display = ConfusionMatrixDisplay(cm).plot()


# # Casos simplificado 2D

# In[23]:


X = np.array(messi_csv[['Goals scored', 'Minutes played']])


# In[24]:


x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1234, shuffle=True)


# In[25]:


knn.fit(x_train, y_train)


# In[26]:


y_pred = knn.predict(x_test)


# In[27]:


knn.score(x_test, y_test)


# In[28]:


knn.score(x_train, y_train)


# In[29]:


cdict = {0: 'red', 1: 'green', 2: 'blue', 3: 'yellow'}
catdict= {0:'Champions League', 1: 'La Liga', 2:'Copa del Rey', 3:'Supercopa de España'}

figure, ax = plt.subplots(figsize=(8,8))

labels = Y
for g in np.unique(labels):
    ix = np.where(g == labels)
    ax.scatter(X[ix,0], X[ix,1], c = cdict[g], label = catdict[g], s = 45,edgecolor='k')

plt.xlabel('Goals Scored')
plt.ylabel('Minutes Played')
ax.legend()
plt.show()


# In[30]:


# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 0.5,  X[:, 0].max() + 0.5
y_min, y_max =  X[:, 1].min() - 0.5,  X[:, 1].max() + 0.5
h = 1.0  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto')

# Plot also the training points
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points')
# Plot also the testing points
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points')
plt.legend(loc="upper left")
plt.xlabel("Matches played")
plt.ylabel("Goals Scored")

plt.show()


# # KNN escalado

# In[31]:


X_scaled = minmax_scale(X)


# In[32]:


x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=1234, shuffle=True)


# In[33]:


knn = KNeighborsClassifier(5) # Jugar con este valor y ver la variacion en el resultado del grafico.


# In[34]:


knn.fit(x_train, y_train)


# In[35]:


# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X_scaled[:, 0].min() - 0.5,  X_scaled[:, 0].max() + 0.5
y_min, y_max =  X_scaled[:, 1].min() - 0.5,  X_scaled[:, 1].max() + 0.5
h = 0.009  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap = "cool", shading ='auto')

# Plot also the training points
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, edgecolors="k", cmap="jet",label='Training Points')
# Plot also the testing points
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred, edgecolors="k", cmap="jet",marker="^",label='Test Points')
plt.legend(loc="upper left")
plt.xlabel("Matches played")
plt.ylabel("Goals Scored")

plt.show()