#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # # Yellowbrick Feature Importance Examples # # This notebook is a sample of the feature importance examples that yellowbrick provides. # In[2]: import os import sys sys.path.insert(0, "../..") import importlib import numpy as np import pandas as pd import yellowbrick import yellowbrick as yb from yellowbrick.features.importances import FeatureImportances import matplotlib as mpl import matplotlib.pyplot as plt from sklearn import manifold, datasets from sklearn.linear_model import LogisticRegression, LinearRegression mpl.rcParams["figure.figsize"] = (9,6) # ## Load Iris Datasets for Example Code # In[106]: X_iris, y_iris = datasets.load_iris(True) X_iris_pd = pd.DataFrame(X_iris, columns=['f1', 'f2', 'f3', 'f4']) # ### Logistic Regression with Mean of Feature Importances # # *Should we normalize relative to maximum value or maximum absolute value?* # In[126]: viz = FeatureImportances(LogisticRegression()) viz.fit(X_iris, y_iris) viz.show() # In[113]: viz = FeatureImportances(LogisticRegression(), relative=False) viz.fit(X_iris, y_iris) viz.show() # In[114]: viz = FeatureImportances(LogisticRegression(), absolute=True) viz.fit(X_iris, y_iris) viz.show() # In[116]: viz = FeatureImportances(LogisticRegression(), relative=False, absolute=True) viz.fit(X_iris, y_iris) viz.show() # ### Logistic Regression with Stacked Feature Importances # # *Need to decide how to scale scale feature importance when `relative=True`* # In[127]: viz = FeatureImportances(LogisticRegression(), stack=True) viz.fit(X_iris, y_iris) viz.show() # In[128]: viz = FeatureImportances(LogisticRegression(), stack=True, relative=False) viz.fit(X_iris, y_iris) viz.show() # In[129]: viz = FeatureImportances(LogisticRegression(), stack=True, absolute=True) viz.fit(X_iris, y_iris) viz.show() # In[130]: viz = FeatureImportances(LogisticRegression(), stack=True, relative=False, absolute=True) viz.fit(X_iris, y_iris) viz.show() # ## Load Digits Datasets for Example Code # # *Should we add an option to show only top n features?* # In[121]: X_digits, y_digits = datasets.load_digits(return_X_y=True) # In[124]: viz = FeatureImportances(LogisticRegression(), stack=True, relative=True) viz.fit(X_digits, y_digits) viz.show() # ## Linear Regression # In[131]: viz = FeatureImportances(LinearRegression()) viz.fit(X_iris, y_iris) viz.show() # In[132]: viz = FeatureImportances(LinearRegression(), stack=True) viz.fit(X_iris, y_iris) viz.show() # # Playground # In[102]: importlib.reload(yellowbrick.features.importances) from yellowbrick.features.importances import FeatureImportances # In[103]: viz = FeatureImportances(LogisticRegression(), relative=False, absolute=False, stack=True) viz.fit(X_pd, y) viz.show()