A comparison of a 3 classifiers in scikit-learn
on iris dataset.
The iris dataset is a classic and very easy multi-class classification dataset.
import sys
import os
!{sys.executable} -m pip install scikit-learn
if "Example1_Files" not in os.listdir():
os.mkdir("Example1_Files")
Requirement already satisfied: scikit-learn in c:\users\sepkjaer\appdata\local\programs\python\python35-32\lib\site-packages\scikit_learn-0.19.1-py3.5-win32.egg (0.19.1)
You are using pip version 19.0.2, however version 19.1.1 is available. You should consider upgrading via the 'python -m pip install --upgrade pip' command.
from sklearn import datasets
from sklearn.model_selection import train_test_split
from pycm import ConfusionMatrix
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
from sklearn import svm
classifier_1 = svm.SVC(kernel='linear', C=0.01)
y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test)
cm1=ConfusionMatrix(y_test,y_pred_1)
cm1.print_matrix()
Predict 0 1 2 Actual 0 13 0 0 1 0 10 6 2 0 0 9
cm1.print_normalized_matrix()
Predict 0 1 2 Actual 0 1.0 0.0 0.0 1 0.0 0.625 0.375 2 0.0 0.0 1.0
cm1.Kappa
0.7673469387755101
cm1.Overall_ACC
0.8421052631578947
cm1.SOA1 # Landis and Koch benchmark
'Substantial'
cm1.SOA2 # Fleiss’ benchmark
'Excellent'
cm1.SOA3 # Altman’s benchmark
'Good'
cm1.SOA4 # Cicchetti’s benchmark
'Excellent'
cm1.save_html(os.path.join("Example1_Files","cm1"))
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm1.html', 'Status': True}
from sklearn.tree import DecisionTreeClassifier
classifier_2 = DecisionTreeClassifier(max_depth=5)
y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test)
cm2=ConfusionMatrix(y_test,y_pred_2)
cm2.print_matrix()
Predict 0 1 2 Actual 0 13 0 0 1 0 15 1 2 0 0 9
cm2.print_normalized_matrix()
Predict 0 1 2 Actual 0 1.0 0.0 0.0 1 0.0 0.9375 0.0625 2 0.0 0.0 1.0
cm2.Kappa
0.95978835978836
cm2.Overall_ACC
0.9736842105263158
cm2.SOA1 # Landis and Koch benchmark
'Almost Perfect'
cm2.SOA2 # Fleiss’ benchmark
'Excellent'
cm2.SOA3 # Altman’s benchmark
'Very Good'
cm2.SOA4 # Cicchetti’s benchmark
'Excellent'
cm2.save_html(os.path.join("Example1_Files","cm2"))
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm2.html', 'Status': True}
from sklearn.ensemble import AdaBoostClassifier
classifier_3 = AdaBoostClassifier()
C:\Users\Sepkjaer\AppData\Local\Programs\Python\Python35-32\lib\site-packages\scikit_learn-0.19.1-py3.5-win32.egg\sklearn\ensemble\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release. from numpy.core.umath_tests import inner1d
y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test)
cm3=ConfusionMatrix(y_test,y_pred_3)
cm3.print_matrix()
Predict 0 1 2 Actual 0 13 0 0 1 0 15 1 2 0 3 6
cm3.print_normalized_matrix()
Predict 0 1 2 Actual 0 1.0 0.0 0.0 1 0.0 0.9375 0.0625 2 0.0 0.33333 0.66667
cm3.Kappa
0.8354978354978355
cm3.Overall_ACC
0.8947368421052632
cm3.SOA1 # Landis and Koch benchmark
'Almost Perfect'
cm3.SOA2 # Fleiss’ benchmark
'Excellent'
cm3.SOA3 # Altman’s benchmark
'Very Good'
cm3.SOA4 # Cicchetti’s benchmark
'Excellent'
cm3.save_html(os.path.join("Example1_Files","cm3"))
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm3.html', 'Status': True}
from pycm import Compare
cp = Compare({"C-Support vector":cm1,"Decision tree":cm2,"AdaBoost":cm3})
print(cp)
Best : Decision tree Rank Name Class-Score Overall-Score 1 Decision tree 10.0 6.0 2 AdaBoost 8.7 5.8 3 C-Support vector 8.0 5.43333
cp.save_report(os.path.join("Example1_Files","cp"))
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cp.comp', 'Status': True}