# JUST RUN THIS from google.colab import drive import pandas as pd drive.mount('/content/gdrive') # Load the data df = pd.read_csv('/content/gdrive/MyDrive/datasets/pokemon.csv') df.sample(5) from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC def name_length(name): return len(name) df["Total"] = df["HP"] + df["Attack"] + df["Defense"] + df["Sp. Atk"] + df["Sp. Def"] + df["Speed"] df["Atk Ratio"] = df["Attack"] / df["Sp. Atk"] df["Type 1 Dragon"] = df["Type 1"] == "Dragon" df["Total is 600"] = df["Total"] == 600 df["Name Length"] = df["Name"].apply(name_length) df_train = df[df["Generation"] != 6] df_test = df[df["Generation"] == 6] features = ["HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed", "Total", "Atk Ratio", "Type 1 Dragon", "Total is 600", "Name Length"] X_train = df_train[features] X_test = df_test[features] y_train = df_train['Legendary'] y_test = df_test['Legendary'] from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) model = SVC() model.fit(X_train, y_train) y_pred = pd.Series(model.predict(X_test), index=y_test.index) def calculate_confusion_matrix(y_test, y_pred): # Input: df has 'Admitted' and 'Predicted' columns # Output: Returns tp, tn, fp, fn tp = ((y_test == True) & (y_pred == True)).sum() # True Positive tn = ((y_test == False) & (y_pred == False)).sum() # True Negative fp = ((y_test == False) & (y_pred == True)).sum() # False Positive fn = ((y_test == True) & (y_pred == False)).sum() # False Negative return tp, tn, fp, fn # Calculate confusion matrix tp, tn, fp, fn = calculate_confusion_matrix(y_test, y_pred) print(" Predicted Positive | Predicted Negative") print(f"Actual Positive |{tp:>19d} |{fn:>19d} ") print(f"Actual Negative |{fp:>19d} |{tn:>19d} ") print("") # Calculate accuracy, precision, and recall total = len(y_test) accuracy = (tp + tn) / total precision = tp / (tp + fp) recall = tp / (tp + fn) print(f"Accuracy: {accuracy:>6.2%} (Correctly classified {tp + tn} out of {total})") print(f"Precision: {precision:>6.2%} (When predicted positive, correct {precision:.0%} of the time)") print(f"Recall: {recall:>6.2%} (Found {recall:.0%} of all positive cases)") df_test["Pred. Legendary"] = y_pred display(df_test[y_pred != y_test])