%matplotlib inline
This notebook is a sample of the feature importance examples that yellowbrick provides.
import os
import sys
sys.path.insert(0, "../..")
import importlib
import numpy as np
import pandas as pd
import yellowbrick
import yellowbrick as yb
from yellowbrick.features.importances import FeatureImportances
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn import manifold, datasets
from sklearn.linear_model import LogisticRegression, LinearRegression
mpl.rcParams["figure.figsize"] = (9,6)
X_iris, y_iris = datasets.load_iris(True)
X_iris_pd = pd.DataFrame(X_iris, columns=['f1', 'f2', 'f3', 'f4'])
Should we normalize relative to maximum value or maximum absolute value?
viz = FeatureImportances(LogisticRegression())
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), relative=False)
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), absolute=True)
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), relative=False, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()
Need to decide how to scale scale feature importance when relative=True
viz = FeatureImportances(LogisticRegression(), stack=True)
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), stack=True, relative=False)
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), stack=True, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LogisticRegression(), stack=True, relative=False, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()
Should we add an option to show only top n features?
X_digits, y_digits = datasets.load_digits(return_X_y=True)
viz = FeatureImportances(LogisticRegression(), stack=True, relative=True)
viz.fit(X_digits, y_digits)
viz.show()
viz = FeatureImportances(LinearRegression())
viz.fit(X_iris, y_iris)
viz.show()
viz = FeatureImportances(LinearRegression(), stack=True)
viz.fit(X_iris, y_iris)
viz.show()
importlib.reload(yellowbrick.features.importances)
from yellowbrick.features.importances import FeatureImportances
viz = FeatureImportances(LogisticRegression(), relative=False, absolute=False, stack=True)
viz.fit(X_pd, y)
viz.show()