In [ ]:

# install pycaret
# pip install pycaret

# pip install pycaret[full]

In [33]:

from pycaret.utils import version
version()

Out[33]:

'2.2.3'

In [34]:

from pycaret.datasets import get_data
data = get_data('anomaly')

	Col1	Col2	Col3	Col4	Col5	Col6	Col7	Col8	Col9	Col10
0	0.263995	0.764929	0.138424	0.935242	0.605867	0.518790	0.912225	0.608234	0.723782	0.733591
1	0.546092	0.653975	0.065575	0.227772	0.845269	0.837066	0.272379	0.331679	0.429297	0.367422
2	0.336714	0.538842	0.192801	0.553563	0.074515	0.332993	0.365792	0.861309	0.899017	0.088600
3	0.092108	0.995017	0.014465	0.176371	0.241530	0.514724	0.562208	0.158963	0.073715	0.208463
4	0.325261	0.805968	0.957033	0.331665	0.307923	0.355315	0.501899	0.558449	0.885169	0.182754

In [47]:

data.describe().transpose()

Out[47]:

	count	mean	std	min	25%	50%	75%	max
Col1	1000.0	0.491362	0.259138	0.000000	0.287458	0.492070	0.694192	0.994431
Col2	1000.0	0.490200	0.251931	0.000000	0.291449	0.488656	0.686531	1.000000
Col3	1000.0	0.509077	0.256606	0.000000	0.337802	0.510077	0.686914	1.000000
Col4	1000.0	0.497362	0.263562	0.000000	0.256147	0.497537	0.731949	1.000000
Col5	1000.0	0.586120	0.334658	0.000000	0.169680	0.782019	0.847956	1.000000
Col6	1000.0	0.514636	0.317470	0.000000	0.142587	0.537953	0.856512	1.000000
Col7	1000.0	0.508270	0.278483	0.000000	0.246021	0.465679	0.804935	1.000000
Col8	1000.0	0.457541	0.220129	0.000000	0.245539	0.515619	0.626757	1.000000
Col9	1000.0	0.477685	0.241432	0.000000	0.291452	0.387753	0.723674	0.988732
Col10	1000.0	0.495760	0.211677	0.014495	0.329904	0.488891	0.659528	1.000000

In [ ]:

In [36]:

type(data)

Out[36]:

pandas.core.frame.DataFrame

In [ ]:

In [35]:

data.shape

Out[35]:

(1000, 10)

In [48]:

from pycaret.anomaly import *
s = setup(data, session_id = 123, log_experiment = True, experiment_name = 'anomaly-demo')

	Description	Value
0	session_id	123
1	Original Data	(1000, 10)
2	Missing Values	False
3	Numeric Features	10
4	Categorical Features	0
5	Ordinal Features	False
6	High Cardinality Features	False
7	High Cardinality Method	None
8	Transformed Data	(1000, 10)
9	CPU Jobs	-1
10	Use GPU	False
11	Log Experiment	True
12	Experiment Name	anomaly-demo
13	USI	5118
14	Imputation Type	simple
15	Iterative Imputation Iteration	None
16	Numeric Imputer	mean
17	Iterative Imputation Numeric Model	None
18	Categorical Imputer	mode
19	Iterative Imputation Categorical Model	None
20	Unknown Categoricals Handling	least_frequent
21	Normalize	False
22	Normalize Method	None
23	Transformation	False
24	Transformation Method	None
25	PCA	False
26	PCA Method	None
27	PCA Components	None
28	Ignore Low Variance	False
29	Combine Rare Levels	False
30	Rare Level Threshold	None
31	Numeric Binning	False
32	Remove Outliers	False
33	Outliers Threshold	None
34	Remove Multicollinearity	False
35	Multicollinearity Threshold	None
36	Clustering	False
37	Clustering Iteration	None
38	Polynomial Features	False
39	Polynomial Degree	None
40	Trignometry Features	False
41	Polynomial Threshold	None
42	Group Features	False
43	Feature Selection	False
44	Features Selection Threshold	None
45	Feature Interaction	False
46	Feature Ratio	False
47	Interaction Threshold	None

In [49]:

models()

Out[49]:

	Name	Reference
ID
abod	Angle-base Outlier Detection	pyod.models.abod.ABOD
cluster	Clustering-Based Local Outlier	pyod.models.cblof.CBLOF
cof	Connectivity-Based Local Outlier	pyod.models.cof.COF
iforest	Isolation Forest	pyod.models.iforest.IForest
histogram	Histogram-based Outlier Detection	pyod.models.hbos.HBOS
knn	K-Nearest Neighbors Detector	pyod.models.knn.KNN
lof	Local Outlier Factor	pyod.models.lof.LOF
svm	One-class SVM detector	pyod.models.ocsvm.OCSVM
pca	Principal Component Analysis	pyod.models.pca.PCA
mcd	Minimum Covariance Determinant	pyod.models.mcd.MCD
sod	Subspace Outlier Detection	pyod.models.sod.SOD
sos	Stochastic Outlier Selection	pyod.models.sos.SOS

In [50]:

iforest = create_model('iforest')

In [60]:

iforest

Out[60]:

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=123, verbose=0)

In [61]:

iforest = create_model('iforest', n_estimators=200)
iforest

Out[61]:

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,
    random_state=123, verbose=0)

In [56]:

knn = create_model('knn')
print(knn)

KNN(algorithm='auto', contamination=0.05, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
  radius=1.0)

In [63]:

plot_model(iforest, plot = 'umap')

In [64]:

save_model(iforest, model_name = 'abc')

Transformation Pipeline and Model Succesfully Saved

Out[64]:

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[],
                                       target='UNSUPERVISED_DUMMY_TARGET',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='most frequent',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None...
                 ('fix_perfect', 'passthrough'),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  IForest(behaviour='new', bootstrap=False, contamination=0.05,
     max_features=1.0, max_samples='auto', n_estimators=200, n_jobs=-1,
     random_state=123, verbose=0)]],
          verbose=False), 'abc.pkl')

In [65]:

l = load_model('abc')

Transformation Pipeline and Model Successfully Loaded

In [68]:

from pycaret.anomaly import load_model, predict_model
l = load_model('iforest-abc', platform = 'aws', authentication = .......)
p = predict_model(l, data=data)
p.head()

Transformation Pipeline and Model Successfully Loaded

Out[68]:

	Col1	Col2	Col3	Col4	Col5	Col6	Col7	Col8	Col9	Col10	Anomaly	Anomaly_Score
0	0.263995	0.764929	0.138424	0.935242	0.605867	0.518790	0.912225	0.608234	0.723782	0.733591	0	-0.030361
1	0.546092	0.653975	0.065575	0.227772	0.845269	0.837066	0.272379	0.331679	0.429297	0.367422	0	-0.078290
2	0.336714	0.538842	0.192801	0.553563	0.074515	0.332993	0.365792	0.861309	0.899017	0.088600	1	0.026938
3	0.092108	0.995017	0.014465	0.176371	0.241530	0.514724	0.562208	0.158963	0.073715	0.208463	1	0.053551
4	0.325261	0.805968	0.957033	0.331665	0.307923	0.355315	0.501899	0.558449	0.885169	0.182754	0	-0.015639

In [74]:

Out[74]:

	Col1	Col2	Col3	Col4	Col5	Col6	Col7	Col8	Col9	Col10	Anomaly	Anomaly_Score
0	0.263995	0.764929	0.138424	0.935242	0.605867	0.518790	0.912225	0.608234	0.723782	0.733591	0	-0.030361
1	0.546092	0.653975	0.065575	0.227772	0.845269	0.837066	0.272379	0.331679	0.429297	0.367422	0	-0.078290
2	0.336714	0.538842	0.192801	0.553563	0.074515	0.332993	0.365792	0.861309	0.899017	0.088600	1	0.026938
3	0.092108	0.995017	0.014465	0.176371	0.241530	0.514724	0.562208	0.158963	0.073715	0.208463	1	0.053551
4	0.325261	0.805968	0.957033	0.331665	0.307923	0.355315	0.501899	0.558449	0.885169	0.182754	0	-0.015639
...	...	...	...	...	...	...	...	...	...	...	...	...
995	0.305055	0.656837	0.331665	0.822525	0.907127	0.882276	0.855732	0.584786	0.808640	0.242762	0	-0.082756
996	0.812627	0.864258	0.616604	0.167966	0.811223	0.938071	0.418462	0.472306	0.348347	0.671129	0	-0.065453
997	0.250967	0.138627	0.919703	0.461234	0.886555	0.869888	0.800908	0.530324	0.779433	0.234952	0	-0.055405
998	0.502436	0.936820	0.580062	0.540773	0.151995	0.059452	0.225220	0.242755	0.279385	0.538755	0	-0.068005
999	0.457991	0.017755	0.714113	0.125992	0.063316	0.154739	0.922974	0.692299	0.816777	0.307592	0	-0.012268

1000 rows × 12 columns

In [80]:

iforest = create_model('iforest', fraction = 0.05)
results = assign_model(iforest)
results.head()

Out[80]:

	Col1	Col2	Col3	Col4	Col5	Col6	Col7	Col8	Col9	Col10	Anomaly	Anomaly_Score
0	0.263995	0.764929	0.138424	0.935242	0.605867	0.518790	0.912225	0.608234	0.723782	0.733591	0	-0.035865
1	0.546092	0.653975	0.065575	0.227772	0.845269	0.837066	0.272379	0.331679	0.429297	0.367422	0	-0.084927
2	0.336714	0.538842	0.192801	0.553563	0.074515	0.332993	0.365792	0.861309	0.899017	0.088600	1	0.025356
3	0.092108	0.995017	0.014465	0.176371	0.241530	0.514724	0.562208	0.158963	0.073715	0.208463	1	0.042415
4	0.325261	0.805968	0.957033	0.331665	0.307923	0.355315	0.501899	0.558449	0.885169	0.182754	0	-0.023408

In [83]:

results['Anomaly_Score'].hist(bins=100, figsize=(10,6))

Out[83]:

<AxesSubplot:>

In [ ]:

In [71]:

from sklearn import set_config
set_config(display=None)

In [72]:

In [73]:

deploy_model(iforest, 'iforest-abc', platform = 'aws', 
             authentication = {'bucket' : 'pycaret-test'})

Model Succesfully Deployed on AWS S3

In [ ]:

In [51]:

iforest

Out[51]:

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=123, verbose=0)

In [52]:

type(iforest)

Out[52]:

pyod.models.iforest.IForest

In [53]:

results = assign_model(iforest)
results.head()

Out[53]:

	Col1	Col2	Col3	Col4	Col5	Col6	Col7	Col8	Col9	Col10	Anomaly	Anomaly_Score
0	0.263995	0.764929	0.138424	0.935242	0.605867	0.518790	0.912225	0.608234	0.723782	0.733591	0	-0.035865
1	0.546092	0.653975	0.065575	0.227772	0.845269	0.837066	0.272379	0.331679	0.429297	0.367422	0	-0.084927
2	0.336714	0.538842	0.192801	0.553563	0.074515	0.332993	0.365792	0.861309	0.899017	0.088600	1	0.025356
3	0.092108	0.995017	0.014465	0.176371	0.241530	0.514724	0.562208	0.158963	0.073715	0.208463	1	0.042415
4	0.325261	0.805968	0.957033	0.331665	0.307923	0.355315	0.501899	0.558449	0.885169	0.182754	0	-0.023408

In [ ]: