Notebook

Analyze Errors and Explore Interpretability of Models¶

This notebook demonstrates how to use the Responsible AI Widget's Error Analysis dashboard to understand a model trained on the Census dataset. The goal of this sample notebook is to classify income greater or less than 50K with scikit-learn and explore model errors and explanations:

Train a LightGBM classification model using Scikit-learn
Run Interpret-Community's 'explain_model' globally and locally to generate model explanations.
Visualize model errors and global and local explanations with the Error Analysis visualization dashboard.

Install Required Packages¶

In [ ]:

# %pip install --upgrade interpret-community
# %pip install --upgrade raiwidgets

Explain¶

Run model explainer at training time¶

In [ ]:

from sklearn import svm
import pandas as pd
import zipfile
from lightgbm import LGBMClassifier

# Explainer Used: Mimic Explainer
from interpret.ext.blackbox import MimicExplainer
from interpret.ext.glassbox import LinearExplainableModel
from interpret.ext.glassbox import LGBMExplainableModel

# You can use one of the following four interpretable models as a global surrogate to the black box model
#from interpret.ext.glassbox import LGBMExplainableModel
#from interpret.ext.glassbox import LinearExplainableModel
#from interpret.ext.glassbox import SGDExplainableModel
#from interpret.ext.glassbox import DecisionTreeExplainableModel

# OR

# 3. PFI Explainer
#from interpret.ext.blackbox import PFIExplainer 

Load the UCI adult census income dataset¶

In [ ]:

from raiutils.dataset import fetch_dataset
outdirname = 'erroranalysis.12.3.20'
zipfilename = outdirname + '.zip'

fetch_dataset('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)

with zipfile.ZipFile(zipfilename, 'r') as unzip:
    unzip.extractall('.')

In [ ]:

train_data = pd.read_csv('adult-train.csv', skipinitialspace=True)
test_data = pd.read_csv('adult-test.csv', skipinitialspace=True)

In [ ]:

from sklearn.model_selection import train_test_split
test_data_full = test_data
test_data, _ = train_test_split(test_data, test_size=0.9, random_state=7)

In [ ]:

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

def split_label(dataset):
    X = dataset.drop(['income'], axis=1)
    y = dataset[['income']]
    return X, y

def clean_data(X, y):
    features = X.columns.values.tolist()
    classes = y['income'].unique().tolist()
    pipe_cfg = {
        'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(),
        'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(),
    }
    num_pipe = Pipeline([
        ('num_imputer', SimpleImputer(strategy='median')),
        ('num_scaler', StandardScaler())
    ])
    cat_pipe = Pipeline([
        ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),
        ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    feat_pipe = ColumnTransformer([
        ('num_pipe', num_pipe, pipe_cfg['num_cols']),
        ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])
    ])
    X = feat_pipe.fit_transform(X)
    return X, feat_pipe, features, classes

X_train_original, y_train = split_label(train_data)
X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train)
X_test_original, y_test = split_label(test_data)
X_test_original_full, y_test_full = split_label(test_data_full)
y_test = y_test['income'].to_numpy()
y_test_full = y_test_full['income'].to_numpy()
X_test = feat_pipe.transform(X_test_original)
features = train_data.columns.values[1:].tolist()
classes = y_train['income'].unique().tolist()
categorical_features = train_data.dtypes[train_data.dtypes == 'object'].index.values[1:].tolist()

Train a LightGBM classification model, which you want to analyze¶

In [ ]:

clf = LGBMClassifier(n_estimators=1)
model = clf.fit(X_train, y_train['income'])

Explain predictions on your local machine¶

In [ ]:

from interpret_community.common.constants import ShapValuesOutput, ModelTask
# 1. Using SHAP TabularExplainer
model_task = ModelTask.Classification
explainer = MimicExplainer(model, X_train_original, LGBMExplainableModel,
                           augment_data=True, max_num_of_augmentations=10,
                           features=features, classes=classes, model_task=model_task,
                           transformations=feat_pipe)

Generate global explanations¶

Explain overall model predictions (global explanation)

In [ ]:

# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data
# X_train can be passed as well, but with more examples explanations will take longer although they may be more accurate
global_explanation = explainer.explain_global(X_test_original)

In [ ]:

# Sorted SHAP values
print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))
# Corresponding feature names
print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))
# Feature ranks (based on original order of features)
print('global importance rank: {}'.format(global_explanation.global_importance_rank))

# Note: Do not run this cell if using PFIExplainer, it does not support per class explanations
# Per class feature names
print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))
# Per class feature importance values
print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))

In [ ]:

# Print out a dictionary that holds the sorted feature importance names and values
print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))

In [ ]:

from sklearn.pipeline import Pipeline
dashboard_pipeline = Pipeline(steps=[('preprocess', feat_pipe), ('model', model)])

Analyze¶

Analyze model errors and explanations using Error Analysis dashboard¶

In [ ]:

from raiwidgets import ErrorAnalysisDashboard

In [ ]:

# Run error analysis on the full dataset with subsampled explanation data on 5k rows
# Note in this case we need to provide the true_y_dataset parameter matching the
# original full dataset
ErrorAnalysisDashboard(global_explanation, dashboard_pipeline, dataset=X_test_original_full,
                       true_y=y_test, categorical_features=categorical_features,
                       true_y_dataset=y_test_full)