!pip install shap
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import shap
shap.initjs()
corpus, y = shap.datasets.imdb()
corpus_train, corpus_test, y_train, y_test = train_test_split(corpus, y, test_size=.2, random_state=7)
vectorizer = TfidfVectorizer(binary=True)
X_train = vectorizer.fit_transform(corpus_train)
X_test = vectorizer.transform(corpus_test)
model = LogisticRegression(C = .1)
model.fit(X_train, y_train)
explainer = shap.LinearExplainer(model, X_train, feature_dependence='independent')
shap_values = explainer.shap_values(X_test)
# we need to pass a dense version for the plotting functions
X_test_array = X_test.toarray()
shap.summary_plot(shap_values, X_test_array, feature_names=vectorizer.get_feature_names())
ind = 10
print("Positive" if y_test[ind] else "Negative", "Review:")
print(corpus_test[ind])
shap.initjs()
shap.force_plot(
explainer.expected_value, shap_values[ind,:], X_test_array[ind,:],
feature_names=vectorizer.get_feature_names()
)