%run ../../../common_functions/import_all.py
from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython
from common_functions.class_helpers import do_plot_conf_mat
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
config_ipython()
setup_matplotlib()
set_css_style()
We will use the Iris Dataset to do a little classification with a Random Forest and look at the performance metrics.
This uses a routine we wrote here for the job of computing it and plotting it
# Load the Iris dataset from sklearn, separating the data matrix and the array of classes
iris = load_iris()
X = iris.data
y = iris.target
# Initiate the classifier (using default parameters)
rf = RandomForestClassifier()
# Splitting the dataset into train and test (70%/30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# Fitting model on training set and predict on test set
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
# Plot the confusion matrix
do_plot_conf_mat(y_test, y_pred)
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, max_samples=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None, oob_score=False, random_state=None, verbose=0, warm_start=False)
array([[18, 0, 0], [ 0, 14, 0], [ 0, 1, 12]])
# sklearn furnishes a report of these metrics for all classes in one go!
print(classification_report(y_test, y_pred))
precision recall f1-score support 0 1.00 1.00 1.00 18 1 0.93 1.00 0.97 14 2 1.00 0.92 0.96 13 accuracy 0.98 45 macro avg 0.98 0.97 0.98 45 weighted avg 0.98 0.98 0.98 45
# From sklearn
accuracy_score(y_test, y_pred)
0.9777777777777777