# Tratamiento de datos
# ==============================================================================
import pandas as pd
import numpy as np
# Preprocesado y modelado
# ==============================================================================
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')
#Cargamos los datos!
url = 'https://raw.githubusercontent.com/JoaquinAmatRodrigo/' \
+ 'Estadistica-machine-learning-python/master/data/ESL.mixture.csv'
datos = pd.read_csv(url)
datos.head()
X1 | X2 | y | |
---|---|---|---|
0 | 2.526093 | 0.321050 | 0 |
1 | 0.366954 | 0.031462 | 0 |
2 | 0.768219 | 0.717486 | 0 |
3 | 0.693436 | 0.777194 | 0 |
4 | -0.019837 | 0.867254 | 0 |
#Visualizacion!
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6,4))
ax.scatter(datos.X1, datos.X2, c=datos.y);
ax.set_title("Datos");
# División de los datos en train y test
X = datos.drop(columns = 'y')
y = datos['y']
X
X1 | X2 | |
---|---|---|
0 | 2.526093 | 0.321050 |
1 | 0.366954 | 0.031462 |
2 | 0.768219 | 0.717486 |
3 | 0.693436 | 0.777194 |
4 | -0.019837 | 0.867254 |
... | ... | ... |
195 | 0.256750 | 2.293605 |
196 | 1.925173 | 0.165053 |
197 | 1.301941 | 0.992200 |
198 | 0.008131 | 2.242264 |
199 | -0.196246 | 0.551404 |
200 rows × 2 columns
y
0 0 1 0 2 0 3 0 4 0 .. 195 1 196 1 197 1 198 1 199 1 Name: y, Length: 200, dtype: int64
X_train, X_test, y_train, y_test = train_test_split(X,y.values.reshape(-1,1),train_size= 0.7,random_state = 42,shuffle=True)
# Creación del modelo SVM
modelo = SVC(C = 100, kernel = 'linear', random_state=42)
modelo.fit(X_train, y_train)
SVC(C=100, kernel='linear', random_state=42)
#Predicciones!
y_test_pred = modelo.predict(X_test)
A lo largo de este notebook, se solicita calcular las métricas requeridas como así también su correspondiente interpretación:
###Completar
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_test_pred)
0.7
###Completar
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)
array([[21, 10], [ 8, 21]])
###Completar
from sklearn.metrics import f1_score
f1_score(y_test, y_test_pred)
0.7
Calcular todas las metricas al tiempo
from sklearn.metrics import classification_report
reporte=classification_report(y_test,y_test_pred)
print(reporte)
precision recall f1-score support 0 0.72 0.68 0.70 31 1 0.68 0.72 0.70 29 accuracy 0.70 60 macro avg 0.70 0.70 0.70 60 weighted avg 0.70 0.70 0.70 60