# data management
import numpy as np # for linear algebra
import pandas as pd # for tabular data manipulation and processing
# machine learning
import sklearn # for data prep and classical ML
import tensorflow as tf # for deep learning
from tensorflow import keras # for deep learning
import keras.layers as L # for easy NN layer access
# data visualization and graphics
import matplotlib.pyplot as plt # for visualization fundamentals
import seaborn as sns # for pretty visualizations
import cv2 # for image manipulation
# misc
from tqdm.notebook import tqdm # for progress bars
import math # for calculation
import sys # for system manipulation
import os # for file manipulation
# Example data
# Regression
# "Ground-truth values"
y_reg_true = np.random.uniform(0, 20, 50) # Randomly generates number from uniform distribution with range 0 to 20 of size (50, 1)
# Model predictions
y_reg_pred = np.random.uniform(0, 20, 50)
#Classification
y_class_true = np.random.randint(2, size=50) # Randomly generates 1s and 0s of size (50, 1)
# Represents the probabilstic predictions from models
y_class_pred = np.random.uniform(0, 1, 50) # Randomly generates number from uniform distribution with range 0 to 1 of size (50, 1),
# Implementation in Numpy
def mean_absolute_error(y_true, y_pred):
# Check if y_true and y_pred are the same shape
assert y_pred.shape == y_true.shape
return np.sum(np.absolute(y_pred-y_true))/len(y_pred)
print(f"Numpy Implementation of MAE: {mean_absolute_error(y_reg_true, y_reg_pred)}")
# Implementation in Sklearn
from sklearn.metrics import mean_absolute_error
# Usage
print(f"Sklearn Implementation of MAE: {mean_absolute_error(y_reg_true, y_reg_pred)}")
Numpy Implementation of MAE: 6.5101676749399315 Sklearn Implementation of MAE: 6.5101676749399315
# Implemnetation in Numpy
def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
assert y_pred.shape == y_true.shape
return np.sum((y_pred-y_true)**2)/len(y_pred)
print(f"Numpy Implementation of MSE: {mean_squared_error(y_reg_true, y_reg_pred)}")
# Implementation in Sklearn
from sklearn.metrics import mean_squared_error
# Usage
print(f"Sklearn Implementation of MSE: {mean_squared_error(y_reg_true, y_reg_pred)}")
Numpy Implementation of MSE: 63.50136689708434 Sklearn Implementation of MSE: 63.50136689708434
print(f"RMSE: {mean_squared_error(y_reg_true, y_reg_pred) ** (1/2)}")
RMSE: 7.968774491544126
def get_tpr_fpr(y_pred, y_true):
tp = (y_pred == 1) & (y_true == 1)
tn = (y_pred == 0) & (y_true == 0)
fp = (y_pred == 1) & (y_true == 0)
fn = (y_pred == 0) & (y_true == 1)
tpr = tp.sum() / (tp.sum() + fn.sum())
fpr = fp.sum() / (fp.sum() + tn.sum())
return tpr, fpr
def roc_curve(y_pred, y_true, n_thresholds=15000):
fpr_thresh = []
tpr_thresh = []
for i in range(n_thresholds + 1):
threshold_vector = (y_pred >= i/n_thresholds)
tpr, fpr = get_tpr_fpr(threshold_vector, y_true)
fpr_thresh.append(fpr)
tpr_thresh.append(tpr)
return tpr_thresh, fpr_thresh
# Implementation in Numpy
def area_under_roc_curve(y_true, y_pred):
fpr, tpr = roc_curve(y_pred, y_true)
rectangle_roc = 0
for k in range(len(fpr) - 1):
rectangle_roc = rectangle_roc + (fpr[k]- fpr[k + 1]) * tpr[k]
return 1 - rectangle_roc
from sklearn.metrics import roc_auc_score
print(f"Numpy Implementation of ROC-AUC: {area_under_roc_curve(y_class_true, y_class_pred)}")
print(f"Sklearn implementation of ROC-AUC: {roc_auc_score(y_class_true, y_class_pred)}")
Numpy Implementation of ROC-AUC: 0.5467980295566504 Sklearn implementation of ROC-AUC: 0.5467980295566502
As you can see, our custom implementation differs slightly from scikit-learn's. This is because scikit-learn uses a slightly different calculation, but both capture the same idea!
# Convert probability values into integers
y_class_pred = np.rint(y_class_pred)
# Precision
# number of correctly classified positive values/number of all predicted positive values
def precision(y_true, y_pred):
assert y_true.shape == y_pred.shape
return ((y_pred == 1) & (y_true == 1)).sum() / y_pred.sum()
print(f"Numpy Precision: {precision(y_class_true, y_class_pred)}")
# Recall
# number of correctly classified positive values/number of all true positive values
def recall(y_true, y_pred):
assert y_true.shape == y_pred.shape
return ((y_pred == 1) & (y_true == 1)).sum() / y_true.sum()
print(f"Numpy Recall: {recall(y_class_true, y_class_pred)}")
from sklearn.metrics import recall_score, precision_score
print(f"Sklearn Precision {precision_score(y_class_true, y_class_pred)}")
print(f"Sklearn Recall: {recall_score(y_class_true, y_class_pred)}")
Numpy Precision: 0.5 Numpy Recall: 0.5238095238095238 Sklearn Precision 0.5 Sklearn Recall: 0.5238095238095238
def accuracy(y_true, y_pred):
assert y_true.shape == y_pred.shape
# returns a boolean array(of 1s and 0s) indicating if element of both arrays match
return np.average(y_true == y_pred)
from sklearn.metrics import accuracy_score
print(f"Numpy Accuracy: {accuracy(y_class_true, y_class_pred)}")
print(f"Sklearn Accuracy: {accuracy_score(y_class_true, y_class_pred)}")
Numpy Accuracy: 0.58 Sklearn Accuracy: 0.58
# F1 Score
# Harmonic Mean of Precision and Recall
def f1_score(y_true, y_pred):
return (2*precision(y_true, y_pred)*recall(y_true, y_pred)) / (precision(y_true, y_pred)+recall(y_true, y_pred))
print(f"Numpy Implementation of F1 Score: {f1_score(y_class_true, y_class_pred)}")
from sklearn.metrics import f1_score
print(f"Sklearn Implementation of F1 Socre: {f1_score(y_class_true, y_class_pred)}")
Numpy Implementation of F1 Score: 0.5116279069767442 Sklearn Implementation of F1 Socre: 0.5116279069767442