#!/usr/bin/env python # coding: utf-8 # ### Árbol de Decisión modelo de Clasificación en Scikit-Learn - Titanic # In[3]: from google.colab import drive import os drive.mount('/content/gdrive') # Establecer ruta de acceso en dr import os print(os.getcwd()) os.chdir("/content/gdrive/My Drive") # In[4]: import pandas as pd import numpy as np import matplotlib.pyplot as plt titanic = pd.read_csv("Titanic.csv", sep = ",") # In[5]: titanic # In[6]: #Separación en Train y Test X = titanic.drop("Survived", axis=1) y = titanic.Survived # In[7]: from sklearn.model_selection import train_test_split #Train y Test Split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) # In[8]: #Arbol de Decision from sklearn.tree import DecisionTreeClassifier arbol_de_decision = DecisionTreeClassifier(max_depth=4, random_state = 42) # In[9]: #Fit arbol_de_decision.fit(X_train,y_train) #Entrenamos el modelo # In[10]: #Prediccion y_test_pred = arbol_de_decision.predict(X_test) # In[11]: y_test_pred # In[12]: y_test # A lo largo de este notebook, se solicita calcular las métricas requeridas como así también su correspondiente interpretación: # # 1. Calcular la métrica Accuracy. # In[13]: ###Completar from sklearn.metrics import accuracy_score accuracy_score(y_test,y_test_pred) # 2. Crear la Matriz de Confusión # In[14]: ###Completar from sklearn.metrics import confusion_matrix confusion_matrix(y_test, y_test_pred) # 3. Calcular la métrica Precision # In[15]: ###Completar from sklearn.metrics import precision_score precision_score(y_test, y_test_pred) # 4. Calcular la métrica Recall # In[16]: ###Completar from sklearn.metrics import recall_score recall_score(y_test, y_test_pred) # 5. Calcular la métrica F1 score # In[17]: ###Completar from sklearn.metrics import f1_score f1_score(y_test, y_test_pred) # 6. classification_report # In[18]: from sklearn.metrics import classification_report print(classification_report(y_test,y_test_pred)) # 6. Calcular y plotear la Curva ROC # In[19]: ###Completar from sklearn.metrics import roc_curve, roc_auc_score import matplotlib.pyplot as plt y_score1 = arbol_de_decision.predict_proba(X_test)[:,1] y_score1 # In[20]: # Calculo de tasas false_positive_rate1, true_positive_rate1, threshold1 = roc_curve(y_test, y_score1) # In[21]: print('roc_auc_score for DecisionTree: ', roc_auc_score(y_test, y_score1)) # In[22]: plt.subplots(1, figsize=(8,6)) plt.title('Receiver Operating Characteristic - DecisionTree') plt.plot(false_positive_rate1, true_positive_rate1) plt.plot([0, 1], ls="--") plt.plot([0, 0], [1, 0] , c=".7"), plt.plot([1, 1] , c=".7") plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() # # Created in deepnote.com # Created in Deepnote