%%HTML import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap gray_cmap=LinearSegmentedColormap.from_list('gy',[(.3,.3,.3),(.8,.8,.8)], N=2) np.random.seed(222) #Seed for random numbers generation def set_plot_style(): plt.axis([-2,2,-2,2]) plt.xlabel('x1') plt.ylabel('x2') df = pd.read_csv("https://arteagac.github.io/blog/lime/artificial_data.csv",header = 'infer') df[0:5] X = df[['x1','x2']].values y = df['y'].values X = (X - np.mean(X,axis=0)) / np.std(X,axis=0) #Standarization of data set_plot_style() plt.scatter(X[:,0],X[:,1], c=y, cmap=gray_cmap) from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=100) classifier.fit(X, y) #Function to create a mesh grid def make_meshgrid(x1, x2, h=.02,x1_min=0,x1_max=0,x2_min=0,x2_max=0): if x1_min==0 and x1_max==0 and x2_min==0 and x2_max==0: x1_min, x1_max = x1.min() - 0.1, x1.max() + 0.1 x2_min, x2_max = x2.min() - 0.1, x2.max() + 0.1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h), np.arange(x2_min, x2_max, h)) return np.vstack((xx1.ravel(), xx2.ravel())).T #Create mesh grid and predict class for each element in mesh grid XX = make_meshgrid(X[:,0],X[:,1],h=.07) yy = classifier.predict(XX) set_plot_style() plt.scatter(XX[:,0],XX[:,1], c=yy, cmap=gray_cmap) Xi = np.array([0.8,-0.7]) set_plot_style() plt.scatter(XX[:,0],XX[:,1], c=yy, cmap=gray_cmap) plt.scatter(Xi[0],Xi[1],c="blue",marker="o",s=70 ) num_perturb = 500 X_lime = np.random.normal(0,1,size=(num_perturb,X.shape[1])) set_plot_style() plt.scatter(X_lime[:,0],X_lime[:,1],s=2,c="black") y_lime = classifier.predict(X_lime) set_plot_style() plt.scatter(X_lime[:,0],X_lime[:,1],s=5, c=y_lime, cmap=gray_cmap) classifier.predict(np.array([0.8,-0.7]).reshape(1, -1) ) kernel_width = 0.2 distances = np.sum((Xi - X_lime)**2,axis=1) #Euclidean distance weights = np.sqrt(np.exp(-(distances**2)/(kernel_width**2))) #Kernel function weights.shape set_plot_style() plt.scatter(XX[:,0],XX[:,1], c=yy, cmap=gray_cmap) plt.scatter(X_lime[:,0],X_lime[:,1],s=10,c= weights,cmap="RdYlGn") plt.scatter(Xi[0],Xi[1],c="blue",marker="o",s=70 ) from sklearn.linear_model import LinearRegression simpler_model = LinearRegression() simpler_model.fit(X_lime, y_lime, sample_weight=weights) y_linmodel = simpler_model.predict(X_lime) y_linmodel = y_linmodel < 0.5 #Conver to binary class set_plot_style() plt.scatter(XX[:,0],XX[:,1], c=yy, cmap=gray_cmap) plt.scatter(Xi[0],Xi[1],c="blue",marker="o",s=70 ) plt.scatter(X_lime[y_linmodel==0,0],X_lime[y_linmodel==0,1],c= weights[y_linmodel==0],cmap="RdYlGn",marker="_",s=80) plt.scatter(X_lime[y_linmodel==1,0],X_lime[y_linmodel==1,1],c= weights[y_linmodel==1],cmap="RdYlGn",marker="+",s=80) simpler_model.coef_