#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from sklearn.datasets import load_iris, load_breast_cancer from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance as skpermutation_importance # In[2]: def accuracy_score(y_true, y_pred): return np.mean(y_true == y_pred) # In[3]: def permutation_importance(estimator, X, y, n_repeats=5, random_state=0): baseline_score = accuracy_score(y, estimator.predict(X)) importances = np.zeros((X.shape[1], n_repeats)) rng = np.random.RandomState(random_state) for i in range(X.shape[1]): original_features = X[:, i].copy() for j in range(n_repeats): rng.shuffle(X[:, i]) importances[i, j] = accuracy_score(y, estimator.predict(X)) X[:, i] = original_features importances = baseline_score - importances return importances, importances.mean(axis=1), importances.std(axis=1) # In[4]: X, y = load_iris(return_X_y=True) clf = RandomForestClassifier(random_state=0).fit(X, y) importance1, mean1, std1 = permutation_importance(clf, X, y, random_state=0) importance2 = skpermutation_importance(clf, X, y, random_state=0) assert np.allclose(importance1, importance2.importances) assert np.allclose(mean1, importance2.importances_mean) assert np.allclose(std1, importance2.importances_std) # In[5]: X, y = load_breast_cancer(return_X_y=True) clf = RandomForestClassifier(random_state=0).fit(X, y) importance1, mean1, std1 = permutation_importance(clf, X, y, random_state=0) importance2 = skpermutation_importance(clf, X, y, random_state=0) assert np.allclose(importance1, importance2.importances) assert np.allclose(mean1, importance2.importances_mean) assert np.allclose(std1, importance2.importances_std)