In [1]:

import itertools
import pickle
import math

import lightgbm
import numpy as np
import pandas as pd
import shap
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor, plot_tree
import sklearn.ensemble

In [8]:

sklearn.datasets.load_iris(as_frame=True)["frame"].shape

Out[8]:

(150, 5)

In [11]:

import lightgbm
import shap

data = sklearn.datasets.load_iris(as_frame=True)

df = data["frame"].loc[lambda df: df.target.isin([0, 1])].sample(50, random_state=123).reset_index(drop=True)

X, y = df.drop(columns="target"), df["target"]

model = lightgbm.LGBMClassifier(
#     learning_rate=0.3,
#     boost_from_average=False,
    n_estimators=1,
    max_depth=1,
#     num_leaves=50
#     min_child_weight=13,
).fit(X, y)

# explainer = shap.TreeExplainer(model, data=X)
explainer = shap.TreeExplainer(model)

shap_values = explainer.shap_values(X)

print(df.target.value_counts(normalize=True).sort_index().values)
print(np.exp(explainer.expected_value))
print(explainer.expected_value)

going through node_sample_weight branchg
[0.46 0.54]
[0.85185185 1.17391304]
[-0.16034264791559094, 0.16034264791559094]

LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray

In [38]:

for k in sorted(model.booster_.dump_model().keys()):
    if k != 'tree_info':
        print(f'{k}: {model.booster_.dump_model()[k]}')

average_output: False
feature_importances: {'petal_length_(cm)': 1}
feature_infos: {'sepal_length_(cm)': {'min_value': 4.4, 'max_value': 7, 'values': []}, 'sepal_width_(cm)': {'min_value': 2, 'max_value': 4.2, 'values': []}, 'petal_length_(cm)': {'min_value': 1.2, 'max_value': 5, 'values': []}, 'petal_width_(cm)': {'min_value': 0.1, 'max_value': 1.8, 'values': []}}
feature_names: ['sepal_length_(cm)', 'sepal_width_(cm)', 'petal_length_(cm)', 'petal_width_(cm)']
label_index: 0
max_feature_idx: 3
monotone_constraints: []
name: tree
num_class: 1
num_tree_per_iteration: 1
objective: binary sigmoid:1
pandas_categorical: []
version: v3

In [19]:

print(json.dumps(model.booster_.dump_model()['tree_info'][0], indent=4))

{
    "tree_index": 0,
    "num_leaves": 2,
    "num_cat": 0,
    "shrinkage": 1,
    "tree_structure": {
        "split_index": 0,
        "split_feature": 2,
        "split_gain": 46.11800003051758,
        "threshold": 1.8,
        "decision_type": "<=",
        "default_left": true,
        "missing_type": "None",
        "internal_value": 0.160343,
        "internal_weight": 0,
        "internal_count": 50,
        "left_child": {
            "leaf_index": 0,
            "leaf_value": -0.057048660532323575,
            "leaf_weight": 5.46480005979538,
            "leaf_count": 22
        },
        "right_child": {
            "leaf_index": 1,
            "leaf_value": 0.331150104553238,
            "leaf_weight": 6.9552000761032104,
            "leaf_count": 28
        }
    }
}

Check calculation of leaf_weight¶

In [34]:

features = X.columns

threshold = 1.8

X_left = X[X[features[2]] <= threshold]

X_right = X[X[features[2]] > threshold]

y_right = y.loc[X_right.index]
print(y_right.value_counts())

y_left = y.loc[X_left.index]
print(y_left.value_counts())

1    27
0     1
Name: target, dtype: int64
0    22
Name: target, dtype: int64

In [70]:

# left child:
t = -0.057048660532323575
y_hat = np.exp(t) / (1 + np.exp(t))
hessian = y_hat * (1 - y_hat)
hessian * 22

Out[70]:

5.495527420441854

In [71]:

hessian

Out[71]:

0.2497967009291752

In [65]:

5.46480005979538 / 5.495527420441854

Out[65]:

0.9944086602985226

In [60]:

# right child:
t = 0.331150104553238
y_hat = 1 / (1 + np.exp(-t))
hessian = y_hat * (1 - y_hat)
hessian * 28

Out[60]:

6.811548002699206

In [68]:

hessian

Out[68]:

0.24326957152497167

In [61]:

6.811548002699206 / 6.9552000761032104

Out[61]:

0.9793460904313067

In [67]:

5.495527420441854 + 6.811548002699206

Out[67]:

12.307075423141061

In [58]:

y_hat

Out[58]:

0.5820391886541324

In [59]:

model.predict_proba(X[:5])

Out[59]:

array([[0.5142583 , 0.4857417 ],
       [0.41796081, 0.58203919],
       [0.41796081, 0.58203919],
       [0.5142583 , 0.4857417 ],
       [0.41796081, 0.58203919]])

In [ ]:

Confirmed in binary classification, internal_value and leaf_value are logits in terms of $p(y=1|\mathbf{x})$.¶

In [31]:

internal_value = 0.160343

This initial logit is calculated at xentropy_objective.hpp#L134.

In [32]:

np.exp(internal_value) / (1 + np.exp(internal_value))

Out[32]:

0.5400000869213242

which matches the fraction examples with y=1 in df.

In [24]:

model.predict_proba(X[:5])

Out[24]:

array([[0.5142583 , 0.4857417 ],
       [0.41796081, 0.58203919],
       [0.41796081, 0.58203919],
       [0.5142583 , 0.4857417 ],
       [0.41796081, 0.58203919]])

In [21]:

model.predict(X[:2], raw_score=True)

Out[21]:

array([-0.05704866,  0.3311501 ])

In [29]:

t = model.predict(X[:2], raw_score=True)

In [30]:

Out[30]:

array([-0.05704866,  0.3311501 ])

In [23]:

np.exp(t) / (1 + np.exp(t))

Out[23]:

array([0.4857417 , 0.58203919])

In [ ]: