PyOD is a comprehensive Python toolkit to identify outlying objects in multivariate data with both unsupervised and supervised approaches. The model covered in this example includes:
Linear Models for Outlier Detection:
weighted projected distances to the eigenvector hyperplane as the outlier outlier scores) 2. MCD: Minimum Covariance Determinant (use the mahalanobis distances as the outlier scores) 3. OCSVM: One-Class Support Vector Machines
Proximity-Based Outlier Detection Models:
neighbor as the outlier score) 4. Median kNN Outlier Detection (use the median distance to k nearest neighbors as the outlier score) 5. HBOS: Histogram-based Outlier Score
Probabilistic Models for Outlier Detection:
Outlier Ensembles and Combination Frameworks
Corresponding file could be found at /examples/compare_all_models.py
from __future__ import division
from __future__ import print_function
import os
import sys
from time import time
# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
# supress warnings for clean output
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.lscp import LSCP
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
# Define data file and read X and y
mat_file_list = ['arrhythmia.mat',
'cardio.mat',
'glass.mat',
'ionosphere.mat',
'letter.mat',
'lympho.mat',
'mnist.mat',
'musk.mat',
'optdigits.mat',
'pendigits.mat',
'pima.mat',
'satellite.mat',
'satimage-2.mat',
# 'shuttle.mat',
'vertebral.mat',
'vowels.mat',
'wbc.mat']
# Define nine outlier detection tools to be compared
random_state = np.random.RandomState(42)
df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD',
'OCSVM', 'PCA', 'LSCP']
roc_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)
# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
LOF(n_neighbors=50)]
for mat_file in mat_file_list:
print("\n... Processing", mat_file, '...')
mat = loadmat(os.path.join('data', mat_file))
X = mat['X']
y = mat['y'].ravel()
outliers_fraction = np.count_nonzero(y) / len(y)
outliers_percentage = round(outliers_fraction * 100, ndigits=4)
# construct containers for saving results
roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
# 60% data for training and 40% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=random_state)
# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)
classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
'Cluster-based Local Outlier Factor': CBLOF(
contamination=outliers_fraction, check_estimator=False,
random_state=random_state),
'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
check_estimator=False,
random_state=random_state),
'Histogram-base Outlier Detection (HBOS)': HBOS(
contamination=outliers_fraction),
'Isolation Forest': IForest(contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
'Local Outlier Factor (LOF)': LOF(
contamination=outliers_fraction),
'Minimum Covariance Determinant (MCD)': MCD(
contamination=outliers_fraction, random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction,
random_state=random_state),
'Principal Component Analysis (PCA)': PCA(
contamination=outliers_fraction, random_state=random_state),
'Locally Selective Combination (LSCP)': LSCP(
detector_list, contamination=outliers_fraction,
random_state=random_state),
}
for clf_name, clf in classifiers.items():
t0 = time()
clf.fit(X_train_norm)
test_scores = clf.decision_function(X_test_norm)
t1 = time()
duration = round(t1 - t0, ndigits=4)
time_list.append(duration)
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
'execution time: {duration}s'.format(
clf_name=clf_name, roc=roc, prn=prn, duration=duration))
roc_list.append(roc)
prn_list.append(prn)
temp_df = pd.DataFrame(time_list).transpose()
temp_df.columns = df_columns
time_df = pd.concat([time_df, temp_df], axis=0)
temp_df = pd.DataFrame(roc_list).transpose()
temp_df.columns = df_columns
roc_df = pd.concat([roc_df, temp_df], axis=0)
temp_df = pd.DataFrame(prn_list).transpose()
temp_df.columns = df_columns
prn_df = pd.concat([prn_df, temp_df], axis=0)
... Processing arrhythmia.mat ... Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.1454s Cluster-based Local Outlier Factor ROC:0.778, precision @ rank n:0.5, execution time: 0.0301s Feature Bagging ROC:0.7736, precision @ rank n:0.5, execution time: 0.5825s Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.0622s Isolation Forest ROC:0.8217, precision @ rank n:0.5, execution time: 0.2477s K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0932s Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0681s Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.5083s One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0471s Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0602s Locally Selective Combination (LSCP) ROC:0.7754, precision @ rank n:0.4286, execution time: 2.4836s ... Processing cardio.mat ... Angle-based Outlier Detector (ABOD) ROC:0.5952, precision @ rank n:0.1884, execution time: 0.399s Cluster-based Local Outlier Factor ROC:0.8894, precision @ rank n:0.4928, execution time: 0.0261s Feature Bagging ROC:0.5628, precision @ rank n:0.1594, execution time: 0.8914s Histogram-base Outlier Detection (HBOS) ROC:0.8227, precision @ rank n:0.4783, execution time: 0.006s Isolation Forest ROC:0.8953, precision @ rank n:0.4493, execution time: 0.3219s K Nearest Neighbors (KNN) ROC:0.7442, precision @ rank n:0.2899, execution time: 0.3168s Local Outlier Factor (LOF) ROC:0.5459, precision @ rank n:0.1594, execution time: 0.1494s Minimum Covariance Determinant (MCD) ROC:0.7774, precision @ rank n:0.4203, execution time: 0.6467s One-class SVM (OCSVM) ROC:0.914, precision @ rank n:0.4493, execution time: 0.0983s Principal Component Analysis (PCA) ROC:0.9323, precision @ rank n:0.5507, execution time: 0.004s Locally Selective Combination (LSCP) ROC:0.622, precision @ rank n:0.1884, execution time: 5.0424s ... Processing glass.mat ... Angle-based Outlier Detector (ABOD) ROC:0.8588, precision @ rank n:0.0, execution time: 0.0401s Cluster-based Local Outlier Factor ROC:0.7765, precision @ rank n:0.0, execution time: 0.012s Feature Bagging ROC:0.4235, precision @ rank n:0.0, execution time: 0.0281s Histogram-base Outlier Detection (HBOS) ROC:0.6, precision @ rank n:0.0, execution time: 0.002s Isolation Forest ROC:0.7765, precision @ rank n:0.0, execution time: 0.1684s K Nearest Neighbors (KNN) ROC:0.8353, precision @ rank n:0.0, execution time: 0.018s Local Outlier Factor (LOF) ROC:0.3882, precision @ rank n:0.0, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:0.8353, precision @ rank n:0.0, execution time: 0.0431s One-class SVM (OCSVM) ROC:0.7529, precision @ rank n:0.0, execution time: 0.001s Principal Component Analysis (PCA) ROC:0.7176, precision @ rank n:0.0, execution time: 0.002s Locally Selective Combination (LSCP) ROC:0.7529, precision @ rank n:0.0, execution time: 0.2878s ... Processing ionosphere.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9302, precision @ rank n:0.8462, execution time: 0.0792s Cluster-based Local Outlier Factor ROC:0.8073, precision @ rank n:0.6154, execution time: 0.0271s Feature Bagging ROC:0.9092, precision @ rank n:0.7692, execution time: 0.0722s Histogram-base Outlier Detection (HBOS) ROC:0.5869, precision @ rank n:0.4038, execution time: 0.008s Isolation Forest ROC:0.8734, precision @ rank n:0.7115, execution time: 0.2347s K Nearest Neighbors (KNN) ROC:0.9358, precision @ rank n:0.8846, execution time: 0.0251s Local Outlier Factor (LOF) ROC:0.9114, precision @ rank n:0.7692, execution time: 0.006s Minimum Covariance Determinant (MCD) ROC:0.9576, precision @ rank n:0.9038, execution time: 0.0682s One-class SVM (OCSVM) ROC:0.8861, precision @ rank n:0.8077, execution time: 0.005s Principal Component Analysis (PCA) ROC:0.8204, precision @ rank n:0.6154, execution time: 0.002s Locally Selective Combination (LSCP) ROC:0.9041, precision @ rank n:0.75, execution time: 0.5264s ... Processing letter.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9035, precision @ rank n:0.4255, execution time: 0.3579s Cluster-based Local Outlier Factor ROC:0.5555, precision @ rank n:0.0851, execution time: 0.021s Feature Bagging ROC:0.9077, precision @ rank n:0.4894, execution time: 0.751s Histogram-base Outlier Detection (HBOS) ROC:0.6056, precision @ rank n:0.1915, execution time: 0.008s Isolation Forest ROC:0.5945, precision @ rank n:0.1064, execution time: 0.2597s K Nearest Neighbors (KNN) ROC:0.8909, precision @ rank n:0.4043, execution time: 0.1584s Local Outlier Factor (LOF) ROC:0.8821, precision @ rank n:0.4681, execution time: 0.1203s Minimum Covariance Determinant (MCD) ROC:0.8144, precision @ rank n:0.1915, execution time: 1.1551s One-class SVM (OCSVM) ROC:0.5727, precision @ rank n:0.1489, execution time: 0.0852s Principal Component Analysis (PCA) ROC:0.5104, precision @ rank n:0.1277, execution time: 0.004s Locally Selective Combination (LSCP) ROC:0.857, precision @ rank n:0.4043, execution time: 4.7737s ... Processing lympho.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9357, precision @ rank n:0.3333, execution time: 0.0231s Cluster-based Local Outlier Factor ROC:0.9708, precision @ rank n:0.6667, execution time: 0.015s Feature Bagging ROC:0.924, precision @ rank n:0.3333, execution time: 0.0231s Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.004s Isolation Forest ROC:0.9942, precision @ rank n:0.6667, execution time: 0.2065s K Nearest Neighbors (KNN) ROC:0.9064, precision @ rank n:0.3333, execution time: 0.011s Local Outlier Factor (LOF) ROC:0.924, precision @ rank n:0.3333, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:0.7778, precision @ rank n:0.0, execution time: 0.0491s One-class SVM (OCSVM) ROC:0.9357, precision @ rank n:0.3333, execution time: 0.002s Principal Component Analysis (PCA) ROC:0.9649, precision @ rank n:0.6667, execution time: 0.001s Locally Selective Combination (LSCP) ROC:0.9357, precision @ rank n:0.3333, execution time: 0.2988s ... Processing mnist.mat ... Angle-based Outlier Detector (ABOD) ROC:0.7978, precision @ rank n:0.3594, execution time: 7.2033s Cluster-based Local Outlier Factor ROC:0.8477, precision @ rank n:0.3915, execution time: 0.0622s Feature Bagging ROC:0.7451, precision @ rank n:0.3452, execution time: 57.4463s Histogram-base Outlier Detection (HBOS) ROC:0.5645, precision @ rank n:0.1174, execution time: 0.0451s Isolation Forest ROC:0.8154, precision @ rank n:0.3096, execution time: 1.7867s K Nearest Neighbors (KNN) ROC:0.8643, precision @ rank n:0.4448, execution time: 7.1259s Local Outlier Factor (LOF) ROC:0.7442, precision @ rank n:0.3523, execution time: 6.4782s Minimum Covariance Determinant (MCD) ROC:0.8926, precision @ rank n:0.4875, execution time: 2.4916s One-class SVM (OCSVM) ROC:0.8595, precision @ rank n:0.3915, execution time: 4.6975s Principal Component Analysis (PCA) ROC:0.8572, precision @ rank n:0.3843, execution time: 0.1494s Locally Selective Combination (LSCP) ROC:0.7873, precision @ rank n:0.3665, execution time: 191.5348s ... Processing musk.mat ... Angle-based Outlier Detector (ABOD) ROC:0.2111, precision @ rank n:0.0488, execution time: 2.3532s Cluster-based Local Outlier Factor ROC:0.9864, precision @ rank n:0.6829, execution time: 0.0361s Feature Bagging ROC:0.6141, precision @ rank n:0.2195, execution time: 13.4944s Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9756, execution time: 0.0822s Isolation Forest ROC:0.9997, precision @ rank n:0.9756, execution time: 1.5561s K Nearest Neighbors (KNN) ROC:0.8224, precision @ rank n:0.2439, execution time: 2.3803s Local Outlier Factor (LOF) ROC:0.6232, precision @ rank n:0.2195, execution time: 2.0785s Minimum Covariance Determinant (MCD) ROC:0.9984, precision @ rank n:0.878, execution time: 14.7882s One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.3396s Principal Component Analysis (PCA) ROC:0.9999, precision @ rank n:0.9512, execution time: 0.1594s Locally Selective Combination (LSCP) ROC:0.5304, precision @ rank n:0.1463, execution time: 116.7357s ... Processing optdigits.mat ... Angle-based Outlier Detector (ABOD) ROC:0.4294, precision @ rank n:0.0149, execution time: 3.1473s Cluster-based Local Outlier Factor ROC:0.49, precision @ rank n:0.0, execution time: 0.0261s Feature Bagging ROC:0.4108, precision @ rank n:0.0149, execution time: 14.9847s Histogram-base Outlier Detection (HBOS) ROC:0.835, precision @ rank n:0.209, execution time: 0.0341s Isolation Forest ROC:0.7365, precision @ rank n:0.0299, execution time: 0.8773s K Nearest Neighbors (KNN) ROC:0.3836, precision @ rank n:0.0, execution time: 1.9953s Local Outlier Factor (LOF) ROC:0.3996, precision @ rank n:0.0149, execution time: 1.7457s Minimum Covariance Determinant (MCD) ROC:0.3791, precision @ rank n:0.0, execution time: 1.0467s One-class SVM (OCSVM) ROC:0.532, precision @ rank n:0.0, execution time: 1.4028s Principal Component Analysis (PCA) ROC:0.525, precision @ rank n:0.0, execution time: 0.0451s Locally Selective Combination (LSCP) ROC:0.3975, precision @ rank n:0.0149, execution time: 60.2475s ... Processing pendigits.mat ... Angle-based Outlier Detector (ABOD) ROC:0.6608, precision @ rank n:0.1224, execution time: 1.4017s Cluster-based Local Outlier Factor ROC:0.934, precision @ rank n:0.2041, execution time: 0.0551s Feature Bagging ROC:0.3992, precision @ rank n:0.0408, execution time: 4.9451s Histogram-base Outlier Detection (HBOS) ROC:0.9209, precision @ rank n:0.3061, execution time: 0.011s Isolation Forest ROC:0.9296, precision @ rank n:0.3061, execution time: 0.5765s K Nearest Neighbors (KNN) ROC:0.7086, precision @ rank n:0.0408, execution time: 0.9656s Local Outlier Factor (LOF) ROC:0.419, precision @ rank n:0.0408, execution time: 0.6006s Minimum Covariance Determinant (MCD) ROC:0.8369, precision @ rank n:0.0612, execution time: 1.8158s One-class SVM (OCSVM) ROC:0.9267, precision @ rank n:0.2449, execution time: 1.0207s Principal Component Analysis (PCA) ROC:0.9359, precision @ rank n:0.2653, execution time: 0.01s Locally Selective Combination (LSCP) ROC:0.487, precision @ rank n:0.0408, execution time: 25.4254s ... Processing pima.mat ... Angle-based Outlier Detector (ABOD) ROC:0.6864, precision @ rank n:0.5847, execution time: 0.1334s Cluster-based Local Outlier Factor ROC:0.7064, precision @ rank n:0.5678, execution time: 0.0221s Feature Bagging ROC:0.618, precision @ rank n:0.4746, execution time: 0.0942s Histogram-base Outlier Detection (HBOS) ROC:0.7031, precision @ rank n:0.5847, execution time: 0.003s Isolation Forest ROC:0.6704, precision @ rank n:0.5424, execution time: 0.2035s K Nearest Neighbors (KNN) ROC:0.712, precision @ rank n:0.5847, execution time: 0.0662s Local Outlier Factor (LOF) ROC:0.6356, precision @ rank n:0.5169, execution time: 0.01s Minimum Covariance Determinant (MCD) ROC:0.7026, precision @ rank n:0.5678, execution time: 0.0852s One-class SVM (OCSVM) ROC:0.6297, precision @ rank n:0.5085, execution time: 0.012s Principal Component Analysis (PCA) ROC:0.6602, precision @ rank n:0.5508, execution time: 0.002s Locally Selective Combination (LSCP) ROC:0.6538, precision @ rank n:0.5339, execution time: 1.1691s ... Processing satellite.mat ... Angle-based Outlier Detector (ABOD) ROC:0.5676, precision @ rank n:0.4078, execution time: 1.89s Cluster-based Local Outlier Factor ROC:0.7307, precision @ rank n:0.4539, execution time: 0.025s Feature Bagging ROC:0.5645, precision @ rank n:0.4054, execution time: 8.0935s Histogram-base Outlier Detection (HBOS) ROC:0.7593, precision @ rank n:0.5804, execution time: 0.019s Isolation Forest ROC:0.6947, precision @ rank n:0.5686, execution time: 0.8693s K Nearest Neighbors (KNN) ROC:0.6827, precision @ rank n:0.5, execution time: 1.2533s Local Outlier Factor (LOF) ROC:0.5676, precision @ rank n:0.4066, execution time: 1.0658s Minimum Covariance Determinant (MCD) ROC:0.7991, precision @ rank n:0.6832, execution time: 2.8646s One-class SVM (OCSVM) ROC:0.6551, precision @ rank n:0.5355, execution time: 1.3265s Principal Component Analysis (PCA) ROC:0.5976, precision @ rank n:0.4787, execution time: 0.0271s Locally Selective Combination (LSCP) ROC:0.5809, precision @ rank n:0.4184, execution time: 34.2882s ... Processing satimage-2.mat ... Angle-based Outlier Detector (ABOD) ROC:0.8497, precision @ rank n:0.1667, execution time: 1.7396s Cluster-based Local Outlier Factor ROC:0.9568, precision @ rank n:0.5, execution time: 0.0531s Feature Bagging ROC:0.4798, precision @ rank n:0.1, execution time: 7.2252s Histogram-base Outlier Detection (HBOS) ROC:0.9948, precision @ rank n:0.7, execution time: 0.0211s Isolation Forest ROC:0.9997, precision @ rank n:0.9333, execution time: 0.6788s K Nearest Neighbors (KNN) ROC:0.9693, precision @ rank n:0.4, execution time: 1.1992s Local Outlier Factor (LOF) ROC:0.4819, precision @ rank n:0.1, execution time: 0.9445s Minimum Covariance Determinant (MCD) ROC:0.996, precision @ rank n:0.7, execution time: 2.1818s One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.114s Principal Component Analysis (PCA) ROC:0.9974, precision @ rank n:0.8333, execution time: 0.0211s Locally Selective Combination (LSCP) ROC:0.6986, precision @ rank n:0.1, execution time: 33.6747s ... Processing vertebral.mat ... Angle-based Outlier Detector (ABOD) ROC:0.2428, precision @ rank n:0.0, execution time: 0.0371s Cluster-based Local Outlier Factor ROC:0.2588, precision @ rank n:0.0, execution time: 0.023s Feature Bagging ROC:0.2909, precision @ rank n:0.0, execution time: 0.0391s Histogram-base Outlier Detection (HBOS) ROC:0.1524, precision @ rank n:0.0, execution time: 0.003s Isolation Forest ROC:0.2449, precision @ rank n:0.0, execution time: 0.1775s K Nearest Neighbors (KNN) ROC:0.2791, precision @ rank n:0.0, execution time: 0.018s Local Outlier Factor (LOF) ROC:0.293, precision @ rank n:0.0, execution time: 0.003s Minimum Covariance Determinant (MCD) ROC:0.3273, precision @ rank n:0.0, execution time: 0.0491s One-class SVM (OCSVM) ROC:0.2909, precision @ rank n:0.0, execution time: 0.002s Principal Component Analysis (PCA) ROC:0.2439, precision @ rank n:0.0, execution time: 0.001s Locally Selective Combination (LSCP) ROC:0.2503, precision @ rank n:0.0, execution time: 0.3359s ... Processing vowels.mat ... Angle-based Outlier Detector (ABOD) ROC:0.9726, precision @ rank n:0.5, execution time: 0.2637s Cluster-based Local Outlier Factor ROC:0.574, precision @ rank n:0.0, execution time: 0.0221s Feature Bagging ROC:0.955, precision @ rank n:0.25, execution time: 0.2787s Histogram-base Outlier Detection (HBOS) ROC:0.6683, precision @ rank n:0.0625, execution time: 0.004s Isolation Forest ROC:0.7809, precision @ rank n:0.0625, execution time: 0.2557s K Nearest Neighbors (KNN) ROC:0.9775, precision @ rank n:0.3125, execution time: 0.1153s Local Outlier Factor (LOF) ROC:0.9514, precision @ rank n:0.3125, execution time: 0.0351s Minimum Covariance Determinant (MCD) ROC:0.7081, precision @ rank n:0.0625, execution time: 0.7189s One-class SVM (OCSVM) ROC:0.8244, precision @ rank n:0.1875, execution time: 0.0441s Principal Component Analysis (PCA) ROC:0.5585, precision @ rank n:0.0, execution time: 0.002s Locally Selective Combination (LSCP) ROC:0.9604, precision @ rank n:0.3125, execution time: 2.3161s ... Processing wbc.mat ... Angle-based Outlier Detector (ABOD) ROC:0.8803, precision @ rank n:0.2, execution time: 0.0632s Cluster-based Local Outlier Factor ROC:0.9374, precision @ rank n:0.4, execution time: 0.015s Feature Bagging ROC:0.9224, precision @ rank n:0.2, execution time: 0.0702s Histogram-base Outlier Detection (HBOS) ROC:0.9415, precision @ rank n:0.4, execution time: 0.008s Isolation Forest ROC:0.9102, precision @ rank n:0.2, execution time: 0.2055s K Nearest Neighbors (KNN) ROC:0.9034, precision @ rank n:0.2, execution time: 0.0291s Local Outlier Factor (LOF) ROC:0.9211, precision @ rank n:0.2, execution time: 0.007s Minimum Covariance Determinant (MCD) ROC:0.9129, precision @ rank n:0.2, execution time: 0.0602s One-class SVM (OCSVM) ROC:0.9224, precision @ rank n:0.2, execution time: 0.006s Principal Component Analysis (PCA) ROC:0.9102, precision @ rank n:0.2, execution time: 0.002s Locally Selective Combination (LSCP) ROC:0.9116, precision @ rank n:0.4, execution time: 0.5545s
print('Time complexity')
time_df
Time complexity
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | LSCP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 0.1454 | 0.0301 | 0.5825 | 0.0622 | 0.2477 | 0.0932 | 0.0681 | 0.5083 | 0.0471 | 0.0602 | 2.4836 |
0 | cardio | 1831 | 21 | 9.6122 | 0.399 | 0.0261 | 0.8914 | 0.006 | 0.3219 | 0.3168 | 0.1494 | 0.6467 | 0.0983 | 0.004 | 5.0424 |
0 | glass | 214 | 9 | 4.2056 | 0.0401 | 0.012 | 0.0281 | 0.002 | 0.1684 | 0.018 | 0.003 | 0.0431 | 0.001 | 0.002 | 0.2878 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.0792 | 0.0271 | 0.0722 | 0.008 | 0.2347 | 0.0251 | 0.006 | 0.0682 | 0.005 | 0.002 | 0.5264 |
0 | letter | 1600 | 32 | 6.25 | 0.3579 | 0.021 | 0.751 | 0.008 | 0.2597 | 0.1584 | 0.1203 | 1.1551 | 0.0852 | 0.004 | 4.7737 |
0 | lympho | 148 | 18 | 4.0541 | 0.0231 | 0.015 | 0.0231 | 0.004 | 0.2065 | 0.011 | 0.003 | 0.0491 | 0.002 | 0.001 | 0.2988 |
0 | mnist | 7603 | 100 | 9.2069 | 7.2033 | 0.0622 | 57.4463 | 0.0451 | 1.7867 | 7.1259 | 6.4782 | 2.4916 | 4.6975 | 0.1494 | 191.535 |
0 | musk | 3062 | 166 | 3.1679 | 2.3532 | 0.0361 | 13.4944 | 0.0822 | 1.5561 | 2.3803 | 2.0785 | 14.7882 | 1.3396 | 0.1594 | 116.736 |
0 | optdigits | 5216 | 64 | 2.8758 | 3.1473 | 0.0261 | 14.9847 | 0.0341 | 0.8773 | 1.9953 | 1.7457 | 1.0467 | 1.4028 | 0.0451 | 60.2475 |
0 | pendigits | 6870 | 16 | 2.2707 | 1.4017 | 0.0551 | 4.9451 | 0.011 | 0.5765 | 0.9656 | 0.6006 | 1.8158 | 1.0207 | 0.01 | 25.4254 |
0 | pima | 768 | 8 | 34.8958 | 0.1334 | 0.0221 | 0.0942 | 0.003 | 0.2035 | 0.0662 | 0.01 | 0.0852 | 0.012 | 0.002 | 1.1691 |
0 | satellite | 6435 | 36 | 31.6395 | 1.89 | 0.025 | 8.0935 | 0.019 | 0.8693 | 1.2533 | 1.0658 | 2.8646 | 1.3265 | 0.0271 | 34.2882 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 1.7396 | 0.0531 | 7.2252 | 0.0211 | 0.6788 | 1.1992 | 0.9445 | 2.1818 | 1.114 | 0.0211 | 33.6747 |
0 | vertebral | 240 | 6 | 12.5 | 0.0371 | 0.023 | 0.0391 | 0.003 | 0.1775 | 0.018 | 0.003 | 0.0491 | 0.002 | 0.001 | 0.3359 |
0 | vowels | 1456 | 12 | 3.4341 | 0.2637 | 0.0221 | 0.2787 | 0.004 | 0.2557 | 0.1153 | 0.0351 | 0.7189 | 0.0441 | 0.002 | 2.3161 |
0 | wbc | 378 | 30 | 5.5556 | 0.0632 | 0.015 | 0.0702 | 0.008 | 0.2055 | 0.0291 | 0.007 | 0.0602 | 0.006 | 0.002 | 0.5545 |
Analyze the performance of ROC and Precision @ n
print('ROC Performance')
roc_df
ROC Performance
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | LSCP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 0.7687 | 0.778 | 0.7736 | 0.8511 | 0.8217 | 0.782 | 0.7787 | 0.8228 | 0.7986 | 0.7997 | 0.7754 |
0 | cardio | 1831 | 21 | 9.6122 | 0.5952 | 0.8894 | 0.5628 | 0.8227 | 0.8953 | 0.7442 | 0.5459 | 0.7774 | 0.914 | 0.9323 | 0.622 |
0 | glass | 214 | 9 | 4.2056 | 0.8588 | 0.7765 | 0.4235 | 0.6 | 0.7765 | 0.8353 | 0.3882 | 0.8353 | 0.7529 | 0.7176 | 0.7529 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.9302 | 0.8073 | 0.9092 | 0.5869 | 0.8734 | 0.9358 | 0.9114 | 0.9576 | 0.8861 | 0.8204 | 0.9041 |
0 | letter | 1600 | 32 | 6.25 | 0.9035 | 0.5555 | 0.9077 | 0.6056 | 0.5945 | 0.8909 | 0.8821 | 0.8144 | 0.5727 | 0.5104 | 0.857 |
0 | lympho | 148 | 18 | 4.0541 | 0.9357 | 0.9708 | 0.924 | 1 | 0.9942 | 0.9064 | 0.924 | 0.7778 | 0.9357 | 0.9649 | 0.9357 |
0 | mnist | 7603 | 100 | 9.2069 | 0.7978 | 0.8477 | 0.7451 | 0.5645 | 0.8154 | 0.8643 | 0.7442 | 0.8926 | 0.8595 | 0.8572 | 0.7873 |
0 | musk | 3062 | 166 | 3.1679 | 0.2111 | 0.9864 | 0.6141 | 0.9999 | 0.9997 | 0.8224 | 0.6232 | 0.9984 | 1 | 0.9999 | 0.5304 |
0 | optdigits | 5216 | 64 | 2.8758 | 0.4294 | 0.49 | 0.4108 | 0.835 | 0.7365 | 0.3836 | 0.3996 | 0.3791 | 0.532 | 0.525 | 0.3975 |
0 | pendigits | 6870 | 16 | 2.2707 | 0.6608 | 0.934 | 0.3992 | 0.9209 | 0.9296 | 0.7086 | 0.419 | 0.8369 | 0.9267 | 0.9359 | 0.487 |
0 | pima | 768 | 8 | 34.8958 | 0.6864 | 0.7064 | 0.618 | 0.7031 | 0.6704 | 0.712 | 0.6356 | 0.7026 | 0.6297 | 0.6602 | 0.6538 |
0 | satellite | 6435 | 36 | 31.6395 | 0.5676 | 0.7307 | 0.5645 | 0.7593 | 0.6947 | 0.6827 | 0.5676 | 0.7991 | 0.6551 | 0.5976 | 0.5809 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 0.8497 | 0.9568 | 0.4798 | 0.9948 | 0.9997 | 0.9693 | 0.4819 | 0.996 | 1 | 0.9974 | 0.6986 |
0 | vertebral | 240 | 6 | 12.5 | 0.2428 | 0.2588 | 0.2909 | 0.1524 | 0.2449 | 0.2791 | 0.293 | 0.3273 | 0.2909 | 0.2439 | 0.2503 |
0 | vowels | 1456 | 12 | 3.4341 | 0.9726 | 0.574 | 0.955 | 0.6683 | 0.7809 | 0.9775 | 0.9514 | 0.7081 | 0.8244 | 0.5585 | 0.9604 |
0 | wbc | 378 | 30 | 5.5556 | 0.8803 | 0.9374 | 0.9224 | 0.9415 | 0.9102 | 0.9034 | 0.9211 | 0.9129 | 0.9224 | 0.9102 | 0.9116 |
print('Precision @ n Performance')
prn_df
Precision @ n Performance
Data | #Samples | # Dimensions | Outlier Perc | ABOD | CBLOF | FB | HBOS | IForest | KNN | LOF | MCD | OCSVM | PCA | LSCP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arrhythmia | 452 | 274 | 14.6018 | 0.3571 | 0.5 | 0.5 | 0.5714 | 0.5 | 0.5 | 0.4643 | 0.4286 | 0.5 | 0.5 | 0.4286 |
0 | cardio | 1831 | 21 | 9.6122 | 0.1884 | 0.4928 | 0.1594 | 0.4783 | 0.4493 | 0.2899 | 0.1594 | 0.4203 | 0.4493 | 0.5507 | 0.1884 |
0 | glass | 214 | 9 | 4.2056 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | ionosphere | 351 | 33 | 35.8974 | 0.8462 | 0.6154 | 0.7692 | 0.4038 | 0.7115 | 0.8846 | 0.7692 | 0.9038 | 0.8077 | 0.6154 | 0.75 |
0 | letter | 1600 | 32 | 6.25 | 0.4255 | 0.0851 | 0.4894 | 0.1915 | 0.1064 | 0.4043 | 0.4681 | 0.1915 | 0.1489 | 0.1277 | 0.4043 |
0 | lympho | 148 | 18 | 4.0541 | 0.3333 | 0.6667 | 0.3333 | 1 | 0.6667 | 0.3333 | 0.3333 | 0 | 0.3333 | 0.6667 | 0.3333 |
0 | mnist | 7603 | 100 | 9.2069 | 0.3594 | 0.3915 | 0.3452 | 0.1174 | 0.3096 | 0.4448 | 0.3523 | 0.4875 | 0.3915 | 0.3843 | 0.3665 |
0 | musk | 3062 | 166 | 3.1679 | 0.0488 | 0.6829 | 0.2195 | 0.9756 | 0.9756 | 0.2439 | 0.2195 | 0.878 | 1 | 0.9512 | 0.1463 |
0 | optdigits | 5216 | 64 | 2.8758 | 0.0149 | 0 | 0.0149 | 0.209 | 0.0299 | 0 | 0.0149 | 0 | 0 | 0 | 0.0149 |
0 | pendigits | 6870 | 16 | 2.2707 | 0.1224 | 0.2041 | 0.0408 | 0.3061 | 0.3061 | 0.0408 | 0.0408 | 0.0612 | 0.2449 | 0.2653 | 0.0408 |
0 | pima | 768 | 8 | 34.8958 | 0.5847 | 0.5678 | 0.4746 | 0.5847 | 0.5424 | 0.5847 | 0.5169 | 0.5678 | 0.5085 | 0.5508 | 0.5339 |
0 | satellite | 6435 | 36 | 31.6395 | 0.4078 | 0.4539 | 0.4054 | 0.5804 | 0.5686 | 0.5 | 0.4066 | 0.6832 | 0.5355 | 0.4787 | 0.4184 |
0 | satimage-2 | 5803 | 36 | 1.2235 | 0.1667 | 0.5 | 0.1 | 0.7 | 0.9333 | 0.4 | 0.1 | 0.7 | 1 | 0.8333 | 0.1 |
0 | vertebral | 240 | 6 | 12.5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | vowels | 1456 | 12 | 3.4341 | 0.5 | 0 | 0.25 | 0.0625 | 0.0625 | 0.3125 | 0.3125 | 0.0625 | 0.1875 | 0 | 0.3125 |
0 | wbc | 378 | 30 | 5.5556 | 0.2 | 0.4 | 0.2 | 0.4 | 0.2 | 0.2 | 0.2 | 0.2 | 0.2 | 0.2 | 0.4 |