PyOD is a comprehensive Python toolkit to identify outlying objects in multivariate data with both unsupervised and supervised approaches. The model covered in this example includes:
Linear Models for Outlier Detection:
weighted projected distances to the eigenvector hyperplane as the outlier outlier scores) 2. MCD: Minimum Covariance Determinant (use the mahalanobis distances as the outlier scores) 3. OCSVM: One-Class Support Vector Machines
Proximity-Based Outlier Detection Models:
neighbor as the outlier score) 4. HBOS: Histogram-based Outlier Score
Probabilistic Models for Outlier Detection:
Outlier Ensembles and Combination Frameworks
Corresponding file could be found at /examples/compare_all_models.py
from __future__ import division
from __future__ import print_function
import os
import sys
from time import time
# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
# supress warnings for clean output
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
# Define data file and read X and y
mat_file_list = ['arrhythmia.mat',
'cardio.mat',
'glass.mat',
'ionosphere.mat',
'letter.mat',
'lympho.mat',
'mnist.mat',
'musk.mat',
'optdigits.mat',
'pendigits.mat',
'pima.mat',
'satellite.mat',
'satimage-2.mat',
'shuttle.mat',
'vertebral.mat',
'vowels.mat',
'wbc.mat']
# Define nine outlier detection tools to be compared
random_state = np.random.RandomState(42)
df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD',
'OCSVM', 'PCA']
roc_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)
for mat_file in mat_file_list:
print("\n... Processing", mat_file, '...')
mat = loadmat(os.path.join('data', mat_file))
X = mat['X']
y = mat['y'].ravel()
outliers_fraction = np.count_nonzero(y) / len(y)
outliers_percentage = round(outliers_fraction * 100, ndigits=4)
# construct containers for saving results
roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
# 60% data for training and 40% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=random_state)
# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)
classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
'Cluster-based Local Outlier Factor': CBLOF(
contamination=outliers_fraction, check_estimator=False,
random_state=random_state),
'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
random_state=random_state),
'Histogram-base Outlier Detection (HBOS)': HBOS(
contamination=outliers_fraction),
'Isolation Forest': IForest(contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
'Local Outlier Factor (LOF)': LOF(
contamination=outliers_fraction),
'Minimum Covariance Determinant (MCD)': MCD(
contamination=outliers_fraction, random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
'Principal Component Analysis (PCA)': PCA(
contamination=outliers_fraction, random_state=random_state),
}
for clf_name, clf in classifiers.items():
t0 = time()
clf.fit(X_train_norm)
test_scores = clf.decision_function(X_test_norm)
t1 = time()
duration = round(t1 - t0, ndigits=4)
time_list.append(duration)
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
'execution time: {duration}s'.format(
clf_name=clf_name, roc=roc, prn=prn, duration=duration))
roc_list.append(roc)
prn_list.append(prn)
temp_df = pd.DataFrame(time_list).transpose()
temp_df.columns = df_columns
time_df = pd.concat([time_df, temp_df], axis=0)
temp_df = pd.DataFrame(roc_list).transpose()
temp_df.columns = df_columns
roc_df = pd.concat([roc_df, temp_df], axis=0)
temp_df = pd.DataFrame(prn_list).transpose()
temp_df.columns = df_columns
prn_df = pd.concat([prn_df, temp_df], axis=0)
... Processing arrhythmia.mat ... Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 1.126s Cluster-based Local Outlier Factor ROC:0.7824, precision @ rank n:0.4643, execution time: 0.9786s Feature Bagging ROC:0.7796, precision @ rank n:0.4643, execution time: 0.6076s Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.8914s Isolation Forest ROC:0.8639, precision @ rank n:0.6071, execution time: 0.2507s K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.1133s Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0742s Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 1.4578s One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0481s Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0642s ... Processing cardio.mat ... Angle-based Outlier Detector (ABOD) ROC:0.5892, precision @ rank n:0.1918, execution time: 0.3579s Cluster-based Local Outlier Factor ROC:0.973, precision @ rank n:0.7945, execution time: 0.0983s Feature Bagging ROC:0.6385, precision @ rank n:0.1781, execution time: 0.8683s Histogram-base Outlier Detection (HBOS) ROC:0.8373, precision @ rank n:0.4521, execution time: 0.005s Isolation Forest ROC:0.9502, precision @ rank n:0.6027, execution time: 0.2767s K Nearest Neighbors (KNN) ROC:0.734, precision @ rank n:0.3562, execution time: 0.1905s Local Outlier Factor (LOF) ROC:0.588, precision @ rank n:0.1507, execution time: 0.0993s Minimum Covariance Determinant (MCD) ROC:0.8195, precision @ rank n:0.411, execution time: 0.5835s One-class SVM (OCSVM) ROC:0.9478, precision @ rank n:0.5342, execution time: 0.0822s Principal Component Analysis (PCA) ROC:0.9616, precision @ rank n:0.6849, execution time: 0.003s ... Processing glass.mat ... Angle-based Outlier Detector (ABOD) ROC:0.6951, precision @ rank n:0.25, execution time: 0.0361s Cluster-based Local Outlier Factor ROC:0.7957, precision @ rank n:0.25, execution time: 0.0251s Feature Bagging ROC:0.7073, precision @ rank n:0.25, execution time: 0.0351s Histogram-base Outlier Detection (HBOS) ROC:0.7073, precision @ rank n:0.0, execution time: 0.005s Isolation Forest ROC:0.7134, precision @ rank n:0.25, execution time: 0.1584s K Nearest Neighbors (KNN) ROC:0.8384, precision @ rank n:0.25, execution time: 0.013s Local Outlier Factor (LOF) ROC:0.7043, precision @ rank n:0.25, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:0.8293, precision @ rank n:0.0, execution time: 0.0481s One-class SVM (OCSVM) ROC:0.6585, precision @ rank n:0.25, execution time: 0.001s Principal Component Analysis (PCA) ROC:0.686, precision @ rank n:0.25, execution time: 0.001s ... Processing ionosphere.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9181, precision @ rank n:0.8431, execution time: 0.0602s Cluster-based Local Outlier Factor ROC:0.795, precision @ rank n:0.549, execution time: 0.0381s Feature Bagging ROC:0.9303, precision @ rank n:0.8039, execution time: 0.0642s Histogram-base Outlier Detection (HBOS) ROC:0.6052, precision @ rank n:0.3922, execution time: 0.006s Isolation Forest ROC:0.8486, precision @ rank n:0.5882, execution time: 0.1785s K Nearest Neighbors (KNN) ROC:0.932, precision @ rank n:0.8824, execution time: 0.0251s Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.7843, execution time: 0.007s Minimum Covariance Determinant (MCD) ROC:0.9669, precision @ rank n:0.8627, execution time: 0.0622s One-class SVM (OCSVM) ROC:0.8257, precision @ rank n:0.6863, execution time: 0.004s Principal Component Analysis (PCA) ROC:0.7941, precision @ rank n:0.5686, execution time: 0.003s ... Processing letter.mat ... Angle-based Outlier Detector (ABOD) ROC:0.8783, precision @ rank n:0.4375, execution time: 0.365s Cluster-based Local Outlier Factor ROC:0.5301, precision @ rank n:0.0312, execution time: 0.1052s Feature Bagging ROC:0.8947, precision @ rank n:0.4062, execution time: 0.7309s Histogram-base Outlier Detection (HBOS) ROC:0.6063, precision @ rank n:0.0938, execution time: 0.007s Isolation Forest ROC:0.6201, precision @ rank n:0.0625, execution time: 0.2416s K Nearest Neighbors (KNN) ROC:0.8573, precision @ rank n:0.3125, execution time: 0.1905s Local Outlier Factor (LOF) ROC:0.8765, precision @ rank n:0.3438, execution time: 0.0922s Minimum Covariance Determinant (MCD) ROC:0.8061, precision @ rank n:0.1875, execution time: 1.3225s One-class SVM (OCSVM) ROC:0.5927, precision @ rank n:0.125, execution time: 0.0973s Principal Component Analysis (PCA) ROC:0.5216, precision @ rank n:0.125, execution time: 0.005s ... Processing lympho.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9831, precision @ rank n:0.0, execution time: 0.0261s Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.0321s Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.0261s Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.005s Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 0.1795s K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.011s Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0531s One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.002s Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.001s ... Processing mnist.mat ... Angle-based Outlier Detector (ABOD) ROC:0.7628, precision @ rank n:0.3367, execution time: 6.8502s Cluster-based Local Outlier Factor ROC:0.8204, precision @ rank n:0.3605, execution time: 0.9004s Feature Bagging ROC:0.7157, precision @ rank n:0.3741, execution time: 44.9604s Histogram-base Outlier Detection (HBOS) ROC:0.5766, precision @ rank n:0.1361, execution time: 0.0411s Isolation Forest ROC:0.7939, precision @ rank n:0.2721, execution time: 1.6943s K Nearest Neighbors (KNN) ROC:0.8498, precision @ rank n:0.432, execution time: 6.6327s Local Outlier Factor (LOF) ROC:0.7195, precision @ rank n:0.3673, execution time: 5.9395s Minimum Covariance Determinant (MCD) ROC:0.8713, precision @ rank n:0.2653, execution time: 4.1711s One-class SVM (OCSVM) ROC:0.854, precision @ rank n:0.3946, execution time: 4.3626s Principal Component Analysis (PCA) ROC:0.8534, precision @ rank n:0.3878, execution time: 0.1433s ... Processing musk.mat ... Angle-based Outlier Detector (ABOD) ROC:0.2161, precision @ rank n:0.1, execution time: 1.8951s Cluster-based Local Outlier Factor ROC:0.9899, precision @ rank n:0.65, execution time: 0.2998s Feature Bagging ROC:0.473, precision @ rank n:0.125, execution time: 12.2746s Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.975, execution time: 0.0582s Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 1.0528s K Nearest Neighbors (KNN) ROC:0.8009, precision @ rank n:0.175, execution time: 1.7376s Local Outlier Factor (LOF) ROC:0.4629, precision @ rank n:0.125, execution time: 1.5411s Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 21.114s One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.1922s Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.1504s ... Processing optdigits.mat ... Angle-based Outlier Detector (ABOD) ROC:0.4894, precision @ rank n:0.0152, execution time: 2.243s Cluster-based Local Outlier Factor ROC:0.5329, precision @ rank n:0.0, execution time: 0.3911s Feature Bagging ROC:0.5062, precision @ rank n:0.0303, execution time: 11.9945s Histogram-base Outlier Detection (HBOS) ROC:0.8774, precision @ rank n:0.2121, execution time: 0.0271s Isolation Forest ROC:0.6735, precision @ rank n:0.0303, execution time: 0.8623s K Nearest Neighbors (KNN) ROC:0.406, precision @ rank n:0.0, execution time: 1.9502s Local Outlier Factor (LOF) ROC:0.5277, precision @ rank n:0.0303, execution time: 1.7256s Minimum Covariance Determinant (MCD) ROC:0.3822, precision @ rank n:0.0, execution time: 1.6714s One-class SVM (OCSVM) ROC:0.5171, precision @ rank n:0.0, execution time: 1.4729s Principal Component Analysis (PCA) ROC:0.526, precision @ rank n:0.0, execution time: 0.0521s ... Processing pendigits.mat ... Angle-based Outlier Detector (ABOD) ROC:0.667, precision @ rank n:0.0526, execution time: 1.3767s Cluster-based Local Outlier Factor ROC:0.9172, precision @ rank n:0.1579, execution time: 0.1955s Feature Bagging ROC:0.4889, precision @ rank n:0.0526, execution time: 3.6236s Histogram-base Outlier Detection (HBOS) ROC:0.9348, precision @ rank n:0.2632, execution time: 0.008s Isolation Forest ROC:0.9376, precision @ rank n:0.3333, execution time: 0.6768s K Nearest Neighbors (KNN) ROC:0.7371, precision @ rank n:0.0702, execution time: 0.8352s Local Outlier Factor (LOF) ROC:0.4965, precision @ rank n:0.0702, execution time: 0.5886s Minimum Covariance Determinant (MCD) ROC:0.8204, precision @ rank n:0.0877, execution time: 2.274s One-class SVM (OCSVM) ROC:0.9235, precision @ rank n:0.3158, execution time: 0.9646s Principal Component Analysis (PCA) ROC:0.9309, precision @ rank n:0.3158, execution time: 0.007s ... Processing pima.mat ... Angle-based Outlier Detector (ABOD) ROC:0.7163, precision @ rank n:0.5253, execution time: 0.1484s Cluster-based Local Outlier Factor ROC:0.7661, precision @ rank n:0.6061, execution time: 0.0752s Feature Bagging ROC:0.6448, precision @ rank n:0.4444, execution time: 0.0982s Histogram-base Outlier Detection (HBOS) ROC:0.711, precision @ rank n:0.5354, execution time: 0.002s Isolation Forest ROC:0.6818, precision @ rank n:0.5152, execution time: 0.2176s K Nearest Neighbors (KNN) ROC:0.7395, precision @ rank n:0.5859, execution time: 0.0572s Local Outlier Factor (LOF) ROC:0.6574, precision @ rank n:0.4646, execution time: 0.011s Minimum Covariance Determinant (MCD) ROC:0.7175, precision @ rank n:0.5152, execution time: 0.0491s One-class SVM (OCSVM) ROC:0.6561, precision @ rank n:0.5051, execution time: 0.01s Principal Component Analysis (PCA) ROC:0.6762, precision @ rank n:0.5354, execution time: 0.001s ... Processing satellite.mat ... Angle-based Outlier Detector (ABOD) ROC:0.5653, precision @ rank n:0.3962, execution time: 1.6945s Cluster-based Local Outlier Factor ROC:0.5548, precision @ rank n:0.345, execution time: 0.3288s Feature Bagging ROC:0.572, precision @ rank n:0.4, execution time: 7.0868s Histogram-base Outlier Detection (HBOS) ROC:0.7486, precision @ rank n:0.57, execution time: 0.017s Isolation Forest ROC:0.6825, precision @ rank n:0.5825, execution time: 0.5966s K Nearest Neighbors (KNN) ROC:0.6853, precision @ rank n:0.4988, execution time: 1.1901s Local Outlier Factor (LOF) ROC:0.572, precision @ rank n:0.395, execution time: 0.9465s Minimum Covariance Determinant (MCD) ROC:0.8055, precision @ rank n:0.6762, execution time: 2.5738s One-class SVM (OCSVM) ROC:0.6478, precision @ rank n:0.5225, execution time: 1.3165s Principal Component Analysis (PCA) ROC:0.5923, precision @ rank n:0.465, execution time: 0.022s ... Processing satimage-2.mat ... Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.2333, execution time: 1.3987s Cluster-based Local Outlier Factor ROC:0.9783, precision @ rank n:0.6667, execution time: 0.2396s Feature Bagging ROC:0.5235, precision @ rank n:0.1667, execution time: 5.8847s Histogram-base Outlier Detection (HBOS) ROC:0.9784, precision @ rank n:0.6, execution time: 0.015s Isolation Forest ROC:0.9952, precision @ rank n:0.8667, execution time: 0.5154s K Nearest Neighbors (KNN) ROC:0.9515, precision @ rank n:0.4333, execution time: 0.9897s Local Outlier Factor (LOF) ROC:0.5257, precision @ rank n:0.1667, execution time: 0.7229s Minimum Covariance Determinant (MCD) ROC:0.9963, precision @ rank n:0.6667, execution time: 2.0875s One-class SVM (OCSVM) ROC:0.9997, precision @ rank n:0.9, execution time: 1.0097s Principal Component Analysis (PCA) ROC:0.9816, precision @ rank n:0.7333, execution time: 0.016s ... Processing shuttle.mat ... Angle-based Outlier Detector (ABOD) ROC:0.6171, precision @ rank n:0.2003, execution time: 13.0948s Cluster-based Local Outlier Factor ROC:0.6273, precision @ rank n:0.2025, execution time: 0.7941s Feature Bagging ROC:0.4725, precision @ rank n:0.0257, execution time: 73.3649s Histogram-base Outlier Detection (HBOS) ROC:0.9871, precision @ rank n:0.9985, execution time: 0.015s Isolation Forest ROC:0.9976, precision @ rank n:0.9596, execution time: 3.2025s K Nearest Neighbors (KNN) ROC:0.6507, precision @ rank n:0.212, execution time: 9.6808s Local Outlier Factor (LOF) ROC:0.5556, precision @ rank n:0.1548, execution time: 10.6754s Minimum Covariance Determinant (MCD) ROC:0.99, precision @ rank n:0.7395, execution time: 10.3525s One-class SVM (OCSVM) ROC:0.9934, precision @ rank n:0.956, execution time: 44.1575s Principal Component Analysis (PCA) ROC:0.9915, precision @ rank n:0.9516, execution time: 0.032s ... Processing vertebral.mat ... Angle-based Outlier Detector (ABOD) ROC:0.5366, precision @ rank n:0.2143, execution time: 0.0431s Cluster-based Local Outlier Factor ROC:0.3937, precision @ rank n:0.0, execution time: 0.0311s Feature Bagging ROC:0.5279, precision @ rank n:0.1429, execution time: 0.029s Histogram-base Outlier Detection (HBOS) ROC:0.3506, precision @ rank n:0.0, execution time: 0.002s Isolation Forest ROC:0.3772, precision @ rank n:0.0, execution time: 0.1575s K Nearest Neighbors (KNN) ROC:0.4573, precision @ rank n:0.0714, execution time: 0.015s Local Outlier Factor (LOF) ROC:0.4983, precision @ rank n:0.1429, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:0.4103, precision @ rank n:0.0714, execution time: 0.0331s One-class SVM (OCSVM) ROC:0.4686, precision @ rank n:0.0714, execution time: 0.002s Principal Component Analysis (PCA) ROC:0.4085, precision @ rank n:0.0, execution time: 0.001s ... Processing vowels.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9616, precision @ rank n:0.6316, execution time: 0.2336s Cluster-based Local Outlier Factor ROC:0.6496, precision @ rank n:0.1053, execution time: 0.0662s Feature Bagging ROC:0.9365, precision @ rank n:0.3684, execution time: 0.2577s Histogram-base Outlier Detection (HBOS) ROC:0.6876, precision @ rank n:0.1579, execution time: 0.003s Isolation Forest ROC:0.8209, precision @ rank n:0.1579, execution time: 0.2116s K Nearest Neighbors (KNN) ROC:0.9734, precision @ rank n:0.4737, execution time: 0.1113s Local Outlier Factor (LOF) ROC:0.9398, precision @ rank n:0.3684, execution time: 0.0301s Minimum Covariance Determinant (MCD) ROC:0.7243, precision @ rank n:0.1053, execution time: 0.755s One-class SVM (OCSVM) ROC:0.8163, precision @ rank n:0.2632, execution time: 0.0311s Principal Component Analysis (PCA) ROC:0.6297, precision @ rank n:0.1579, execution time: 0.002s ... Processing wbc.mat ... Angle-based Outlier Detector (ABOD) ROC:0.921, precision @ rank n:0.375, execution time: 0.0632s Cluster-based Local Outlier Factor ROC:0.8906, precision @ rank n:0.375, execution time: 0.0421s Feature Bagging ROC:0.9271, precision @ rank n:0.375, execution time: 0.0792s Histogram-base Outlier Detection (HBOS) ROC:0.9479, precision @ rank n:0.5, execution time: 0.005s Isolation Forest ROC:0.9436, precision @ rank n:0.5, execution time: 0.1564s K Nearest Neighbors (KNN) ROC:0.9444, precision @ rank n:0.5, execution time: 0.0271s Local Outlier Factor (LOF) ROC:0.9227, precision @ rank n:0.375, execution time: 0.007s Minimum Covariance Determinant (MCD) ROC:0.9288, precision @ rank n:0.5, execution time: 0.0632s One-class SVM (OCSVM) ROC:0.9358, precision @ rank n:0.375, execution time: 0.005s Principal Component Analysis (PCA) ROC:0.9262, precision @ rank n:0.375, execution time: 0.001s
print('Time complexity')
time_df
Time complexity
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 1.126 | 0.9786 | 0.6076 | 0.8914 | 0.2507 | 0.1133 | 0.0742 | 1.4578 | 0.0481 | 0.0642 |
0 | cardio | 1831 | 21 | 9.6122 | 0.3579 | 0.0983 | 0.8683 | 0.005 | 0.2767 | 0.1905 | 0.0993 | 0.5835 | 0.0822 | 0.003 |
0 | glass | 214 | 9 | 4.2056 | 0.0361 | 0.0251 | 0.0351 | 0.005 | 0.1584 | 0.013 | 0.003 | 0.0481 | 0.001 | 0.001 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.0602 | 0.0381 | 0.0642 | 0.006 | 0.1785 | 0.0251 | 0.007 | 0.0622 | 0.004 | 0.003 |
0 | letter | 1600 | 32 | 6.25 | 0.365 | 0.1052 | 0.7309 | 0.007 | 0.2416 | 0.1905 | 0.0922 | 1.3225 | 0.0973 | 0.005 |
0 | lympho | 148 | 18 | 4.0541 | 0.0261 | 0.0321 | 0.0261 | 0.005 | 0.1795 | 0.011 | 0.003 | 0.0531 | 0.002 | 0.001 |
0 | mnist | 7603 | 100 | 9.2069 | 6.8502 | 0.9004 | 44.9604 | 0.0411 | 1.6943 | 6.6327 | 5.9395 | 4.1711 | 4.3626 | 0.1433 |
0 | musk | 3062 | 166 | 3.1679 | 1.8951 | 0.2998 | 12.2746 | 0.0582 | 1.0528 | 1.7376 | 1.5411 | 21.114 | 1.1922 | 0.1504 |
0 | optdigits | 5216 | 64 | 2.8758 | 2.243 | 0.3911 | 11.9945 | 0.0271 | 0.8623 | 1.9502 | 1.7256 | 1.6714 | 1.4729 | 0.0521 |
0 | pendigits | 6870 | 16 | 2.2707 | 1.3767 | 0.1955 | 3.6236 | 0.008 | 0.6768 | 0.8352 | 0.5886 | 2.274 | 0.9646 | 0.007 |
0 | pima | 768 | 8 | 34.8958 | 0.1484 | 0.0752 | 0.0982 | 0.002 | 0.2176 | 0.0572 | 0.011 | 0.0491 | 0.01 | 0.001 |
0 | satellite | 6435 | 36 | 31.6395 | 1.6945 | 0.3288 | 7.0868 | 0.017 | 0.5966 | 1.1901 | 0.9465 | 2.5738 | 1.3165 | 0.022 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 1.3987 | 0.2396 | 5.8847 | 0.015 | 0.5154 | 0.9897 | 0.7229 | 2.0875 | 1.0097 | 0.016 |
0 | shuttle | 49097 | 9 | 7.1511 | 13.0948 | 0.7941 | 73.3649 | 0.015 | 3.2025 | 9.6808 | 10.6754 | 10.3525 | 44.1575 | 0.032 |
0 | vertebral | 240 | 6 | 12.5 | 0.0431 | 0.0311 | 0.029 | 0.002 | 0.1575 | 0.015 | 0.003 | 0.0331 | 0.002 | 0.001 |
0 | vowels | 1456 | 12 | 3.4341 | 0.2336 | 0.0662 | 0.2577 | 0.003 | 0.2116 | 0.1113 | 0.0301 | 0.755 | 0.0311 | 0.002 |
0 | wbc | 378 | 30 | 5.5556 | 0.0632 | 0.0421 | 0.0792 | 0.005 | 0.1564 | 0.0271 | 0.007 | 0.0632 | 0.005 | 0.001 |
Analyze the performance of ROC and Precision @ n
print('ROC Performance')
roc_df
ROC Performance
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 0.7687 | 0.7824 | 0.7796 | 0.8511 | 0.8639 | 0.782 | 0.7787 | 0.8228 | 0.7986 | 0.7997 |
0 | cardio | 1831 | 21 | 9.6122 | 0.5892 | 0.973 | 0.6385 | 0.8373 | 0.9502 | 0.734 | 0.588 | 0.8195 | 0.9478 | 0.9616 |
0 | glass | 214 | 9 | 4.2056 | 0.6951 | 0.7957 | 0.7073 | 0.7073 | 0.7134 | 0.8384 | 0.7043 | 0.8293 | 0.6585 | 0.686 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.9181 | 0.795 | 0.9303 | 0.6052 | 0.8486 | 0.932 | 0.9227 | 0.9669 | 0.8257 | 0.7941 |
0 | letter | 1600 | 32 | 6.25 | 0.8783 | 0.5301 | 0.8947 | 0.6063 | 0.6201 | 0.8573 | 0.8765 | 0.8061 | 0.5927 | 0.5216 |
0 | lympho | 148 | 18 | 4.0541 | 0.9831 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
0 | mnist | 7603 | 100 | 9.2069 | 0.7628 | 0.8204 | 0.7157 | 0.5766 | 0.7939 | 0.8498 | 0.7195 | 0.8713 | 0.854 | 0.8534 |
0 | musk | 3062 | 166 | 3.1679 | 0.2161 | 0.9899 | 0.473 | 0.9999 | 1 | 0.8009 | 0.4629 | 1 | 1 | 1 |
0 | optdigits | 5216 | 64 | 2.8758 | 0.4894 | 0.5329 | 0.5062 | 0.8774 | 0.6735 | 0.406 | 0.5277 | 0.3822 | 0.5171 | 0.526 |
0 | pendigits | 6870 | 16 | 2.2707 | 0.667 | 0.9172 | 0.4889 | 0.9348 | 0.9376 | 0.7371 | 0.4965 | 0.8204 | 0.9235 | 0.9309 |
0 | pima | 768 | 8 | 34.8958 | 0.7163 | 0.7661 | 0.6448 | 0.711 | 0.6818 | 0.7395 | 0.6574 | 0.7175 | 0.6561 | 0.6762 |
0 | satellite | 6435 | 36 | 31.6395 | 0.5653 | 0.5548 | 0.572 | 0.7486 | 0.6825 | 0.6853 | 0.572 | 0.8055 | 0.6478 | 0.5923 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 0.8432 | 0.9783 | 0.5235 | 0.9784 | 0.9952 | 0.9515 | 0.5257 | 0.9963 | 0.9997 | 0.9816 |
0 | shuttle | 49097 | 9 | 7.1511 | 0.6171 | 0.6273 | 0.4725 | 0.9871 | 0.9976 | 0.6507 | 0.5556 | 0.99 | 0.9934 | 0.9915 |
0 | vertebral | 240 | 6 | 12.5 | 0.5366 | 0.3937 | 0.5279 | 0.3506 | 0.3772 | 0.4573 | 0.4983 | 0.4103 | 0.4686 | 0.4085 |
0 | vowels | 1456 | 12 | 3.4341 | 0.9616 | 0.6496 | 0.9365 | 0.6876 | 0.8209 | 0.9734 | 0.9398 | 0.7243 | 0.8163 | 0.6297 |
0 | wbc | 378 | 30 | 5.5556 | 0.921 | 0.8906 | 0.9271 | 0.9479 | 0.9436 | 0.9444 | 0.9227 | 0.9288 | 0.9358 | 0.9262 |
print('Precision @ n Performance')
prn_df
Precision @ n Performance
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 0.3571 | 0.4643 | 0.4643 | 0.5714 | 0.6071 | 0.5 | 0.4643 | 0.4286 | 0.5 | 0.5 |
0 | cardio | 1831 | 21 | 9.6122 | 0.1918 | 0.7945 | 0.1781 | 0.4521 | 0.6027 | 0.3562 | 0.1507 | 0.411 | 0.5342 | 0.6849 |
0 | glass | 214 | 9 | 4.2056 | 0.25 | 0.25 | 0.25 | 0 | 0.25 | 0.25 | 0.25 | 0 | 0.25 | 0.25 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.8431 | 0.549 | 0.8039 | 0.3922 | 0.5882 | 0.8824 | 0.7843 | 0.8627 | 0.6863 | 0.5686 |
0 | letter | 1600 | 32 | 6.25 | 0.4375 | 0.0312 | 0.4062 | 0.0938 | 0.0625 | 0.3125 | 0.3438 | 0.1875 | 0.125 | 0.125 |
0 | lympho | 148 | 18 | 4.0541 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
0 | mnist | 7603 | 100 | 9.2069 | 0.3367 | 0.3605 | 0.3741 | 0.1361 | 0.2721 | 0.432 | 0.3673 | 0.2653 | 0.3946 | 0.3878 |
0 | musk | 3062 | 166 | 3.1679 | 0.1 | 0.65 | 0.125 | 0.975 | 1 | 0.175 | 0.125 | 1 | 1 | 1 |
0 | optdigits | 5216 | 64 | 2.8758 | 0.0152 | 0 | 0.0303 | 0.2121 | 0.0303 | 0 | 0.0303 | 0 | 0 | 0 |
0 | pendigits | 6870 | 16 | 2.2707 | 0.0526 | 0.1579 | 0.0526 | 0.2632 | 0.3333 | 0.0702 | 0.0702 | 0.0877 | 0.3158 | 0.3158 |
0 | pima | 768 | 8 | 34.8958 | 0.5253 | 0.6061 | 0.4444 | 0.5354 | 0.5152 | 0.5859 | 0.4646 | 0.5152 | 0.5051 | 0.5354 |
0 | satellite | 6435 | 36 | 31.6395 | 0.3962 | 0.345 | 0.4 | 0.57 | 0.5825 | 0.4988 | 0.395 | 0.6762 | 0.5225 | 0.465 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 0.2333 | 0.6667 | 0.1667 | 0.6 | 0.8667 | 0.4333 | 0.1667 | 0.6667 | 0.9 | 0.7333 |
0 | shuttle | 49097 | 9 | 7.1511 | 0.2003 | 0.2025 | 0.0257 | 0.9985 | 0.9596 | 0.212 | 0.1548 | 0.7395 | 0.956 | 0.9516 |
0 | vertebral | 240 | 6 | 12.5 | 0.2143 | 0 | 0.1429 | 0 | 0 | 0.0714 | 0.1429 | 0.0714 | 0.0714 | 0 |
0 | vowels | 1456 | 12 | 3.4341 | 0.6316 | 0.1053 | 0.3684 | 0.1579 | 0.1579 | 0.4737 | 0.3684 | 0.1053 | 0.2632 | 0.1579 |
0 | wbc | 378 | 30 | 5.5556 | 0.375 | 0.375 | 0.375 | 0.5 | 0.5 | 0.5 | 0.375 | 0.5 | 0.375 | 0.375 |