import pandas as pd
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support
# imports for onnx conversion and inference
import onnx
import ebm2onnx
import onnxruntime as rt
import numpy as np
import tempfile
df = pd.read_csv('titanic_train.csv')
df = df.dropna()
df.head()
feature_columns = ['Age', 'Fare', 'Pclass', 'Embarked']
label_column = "Survived"
y = df[[label_column]]
le = LabelEncoder()
y_enc = le.fit_transform(y)
x = df[feature_columns]
x_train, x_test, y_train, y_test = train_test_split(x, y_enc)
ebm = ExplainableBoostingClassifier(
interactions=2,
feature_types=['continuous', 'continuous', 'continuous', 'nominal']
)
ebm.fit(x_train, y_train)
# A lookup at the generated model
ebm_global = ebm.explain_global()
show(ebm_global)
onnx_model = ebm2onnx.to_onnx(
model=ebm,
dtype=ebm2onnx.get_dtype_from_pandas(x_train),
name="ebm",
)
ebm_pred = ebm.predict(x_test)
pd.DataFrame(precision_recall_fscore_support(y_test, ebm_pred, average=None), index=['Precision', 'Recall', 'FScore', 'Support'])
_, filename = tempfile.mkstemp()
onnx.save_model(onnx_model, filename)
sess = rt.InferenceSession(filename)
onnx_pred = sess.run(None, {
'Age': x_test['Age'].values,
'Fare': x_test['Fare'].values,
'Pclass': x_test['Pclass'].values,
'Embarked': x_test['Embarked'].values,
})
print("metrics of output {}:".format(sess.get_outputs()[0].name))
pd.DataFrame(precision_recall_fscore_support(y_test, onnx_pred[0], average=None), index=['Precision', 'Recall', 'FScore', 'Support'])