from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.metrics import roc_auc_score, f1_score, matthews_corrcoef, balanced_accuracy_score, auc
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
from my_util import *
from lime.lime.lime_tabular import LimeTabularExplainer
import sys, os, pickle, time
sys.path.append(os.path.abspath('../'))
from pyexplainer.pyexplainer_pyexplainer import *
# from datetime import datetime
from IPython.display import display
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
data_path = './dataset/'
result_dir = './eval_result/'
dump_dataframe_dir = './dump_df/'
pyExp_dir = './explainer_obj_20_4_2021/'
other_object_dir = './other_object/'
proj_name = 'qt' # ['openstack','qt']
if not os.path.exists(result_dir):
os.makedirs(result_dir)
if not os.path.exists(dump_dataframe_dir):
os.makedirs(dump_dataframe_dir)
if not os.path.exists(pyExp_dir):
os.makedirs(pyExp_dir)
if not os.path.exists(other_object_dir):
os.makedirs(other_object_dir)
x_train, x_test, y_train, y_test = prepare_data(proj_name, mode = 'all')
if proj_name == 'openstack':
x_train_original, x_test_original = prepare_data_all_metrics(proj_name, mode='all')
display(x_test)
# display(x_test_original)
la | ld | nd | ns | ent | nrev | rtime | hcmt | self | ndev | age | app | asexp | rsexp | asawr | rsawr | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
commit_id | ||||||||||||||||
00556b20067c3a4adf6ff33a17d2a4232fdce6ee | 1 | 1 | 1 | 1 | 0.000000 | 2.0 | 2.104201 | 1.0 | 0 | 26.0 | 0.939167 | 2.0 | 118.0 | 1164.0 | 0.008554 | 0.083747 |
c74e4a74ba97d32df7406fb684527d415dd8a6ba | 47 | 7 | 1 | 1 | 0.927838 | 1.0 | 7.052569 | 5.0 | 0 | 36.0 | 0.913021 | 5.0 | 293.0 | 2672.0 | 0.021133 | 0.192136 |
5af870b2459afc9cc934d9e79e80e2e49ff75049 | 2 | 2 | 1 | 1 | 0.000000 | 1.0 | 1.488750 | 1.0 | 0 | 9.0 | 103.431806 | 2.0 | 1359.0 | 1380.0 | 0.097750 | 0.099260 |
5a83f73ac92a73c76ab7e26e60deba905dc3f64a | 14 | 10 | 1 | 1 | 1.000000 | 0.0 | 0.000000 | 0.0 | 1 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 |
d9d0a944495208635688ca402fe04860e81490f6 | 12 | 2 | 1 | 1 | 0.000000 | 1.0 | 0.007650 | 2.0 | 0 | 2.0 | 0.898206 | 1.0 | 1700.0 | 574.0 | 0.122251 | 0.041325 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
04edeafade9058bde6d6dd58e6b1a89bace2fd50 | 30 | 4 | 1 | 1 | 0.000000 | 2.0 | 0.879294 | 1.0 | 0 | 8.0 | 0.207014 | 1.0 | 275.0 | 151.0 | 0.013507 | 0.007439 |
ff3dcc49c4a1912189091e35e87cb61af2f62d47 | 1 | 0 | 1 | 1 | 0.000000 | 6.0 | 10.021250 | 4.0 | 0 | 8.0 | 174.028530 | 2.0 | 436.0 | 655.0 | 0.021385 | 0.032102 |
13171e7e63bc5199d783e33decf7f402019d05cc | 3 | 0 | 1 | 1 | 0.000000 | 2.0 | 0.309213 | 4.0 | 0 | 1.0 | 3.995706 | 1.0 | 469.0 | 173.0 | 0.022999 | 0.008514 |
fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d | 130 | 130 | 3 | 2 | 0.788875 | 3.0 | 0.529745 | 2.0 | 0 | 29.0 | 0.176898 | 2.0 | 1985.0 | 3771.0 | 0.097177 | 0.184567 |
33bd1e08d0043e9b1340898039562bdf595879b5 | 17 | 12 | 1 | 1 | 0.000000 | 1.0 | 0.613299 | 1.0 | 0 | 3.0 | 12.977315 | 1.0 | 434.0 | 225.0 | 0.021284 | 0.011058 |
8277 rows × 16 columns
# # print(y_train)
# for col in x_test.columns:
# print(col,len(x_test[col].unique()))
col = list(x_test.columns)
# print(col)
# print(col.index('self'))
# def find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'RF'):
# global_model_name = global_model_name.upper()
# if global_model_name not in ['RF','LR']:
# print('wrong global model name. the global model name must be RF or LR')
# return
# if global_model_name == 'RF':
# global_model = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=24)
# elif global_model_name == 'LR':
# global_model = LogisticRegression(random_state=0, n_jobs=24)
# for k in range(1,51):
# smt = SMOTE(k_neighbors=k, random_state=42, n_jobs=24)
# new_x_train, new_y_train = smt.fit_resample(x_train, y_train)
# global_model.fit(new_x_train, new_y_train)
# # pred = global_model.predict(x_test)
# prob = global_model.predict_proba(x_test)[:,1]
# AUC = roc_auc_score(y_test, prob)
# print('k_neighbor: {}, AUC: {}'.format(k, AUC))
# # pickle.dump(global_model, open(proj_name+'_'+global_model_name+'_global_model.pkl','wb'))
# # print('train {} finished'.format(global_model_name))
# find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'RF')
# find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'LR')
def train_global_model(x_train,y_train, global_model_name = 'RF'):
global_model_name = global_model_name.upper()
if global_model_name not in ['RF','LR']:
print('wrong global model name. the global model name must be RF or LR')
return
smt = SMOTE(k_neighbors=5, random_state=42, n_jobs=24)
new_x_train, new_y_train = smt.fit_resample(x_train, y_train)
if global_model_name == 'RF':
global_model = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=24)
elif global_model_name == 'LR':
global_model = LogisticRegression(random_state=0, n_jobs=24)
global_model.fit(new_x_train, new_y_train)
pickle.dump(global_model, open(proj_name+'_'+global_model_name+'_global_model.pkl','wb'))
print('train {} finished'.format(global_model_name))
train_black_box = False
if train_black_box:
train_global_model(x_train, y_train,'RF')
train_global_model(x_train, y_train,'LR')
# add this function to util file (done later...)
def get_prediction_result_df(proj_name, global_model_name):
global_model_name = global_model_name.upper()
if global_model_name not in ['RF','LR']:
print('wrong global model name. the global model name must be RF or LR')
return
prediction_df_dir = dump_dataframe_dir+proj_name+'_'+global_model_name+'_prediction_result.csv'
correctly_predict_df_dir = dump_dataframe_dir+proj_name+'_'+global_model_name+'_correctly_predict_as_defective.csv'
if not os.path.exists(prediction_df_dir) or not os.path.exists(correctly_predict_df_dir):
global_model = pickle.load(open(proj_name+'_'+global_model_name+'_global_model.pkl','rb'))
pred = global_model.predict(x_test)
defective_prob = global_model.predict_proba(x_test)[:,1]
prediction_df = x_test.copy()
prediction_df['pred'] = pred
prediction_df['defective_prob'] = defective_prob
prediction_df['defect'] = y_test
# print('AUC is',roc_auc_score(y_test, defective_prob))
correctly_predict_df = prediction_df[(prediction_df['pred']==1) & (prediction_df['defect']==1)]
print('total correct prediction: {}'.format(str(len(correctly_predict_df))))
prediction_df.to_csv(prediction_df_dir)
correctly_predict_df.to_csv(correctly_predict_df_dir)
else:
prediction_df = pd.read_csv(prediction_df_dir)
correctly_predict_df = pd.read_csv(correctly_predict_df_dir)
prediction_df = prediction_df.set_index('commit_id')
correctly_predict_df = correctly_predict_df.set_index('commit_id')
print('total correct prediction: {}'.format(str(len(correctly_predict_df))))
return prediction_df, correctly_predict_df
rf_prediction_df, rf_correctly_predict_df = get_prediction_result_df(proj_name, 'rf')
lr_prediction_df, lr_correctly_predict_df = get_prediction_result_df(proj_name, 'lr')
total correct prediction: 79 total correct prediction: 255
display(rf_prediction_df)
la | nd | ns | ent | nrev | rtime | self | ndev | age | app | rrexp | asawr | rsawr | pred | defective_prob | defect | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
commit_id | ||||||||||||||||
5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef | 17 | 2 | 1 | 0.936667 | 7.0 | 17.869722 | 0 | 38.0 | 0.045706 | 1.0 | 1306.0 | 0.113107 | 0.247377 | False | 0.14 | False |
878ac164a391e761c72c5fdcd12f0caf48c7d359 | 49 | 9 | 3 | 0.718116 | 5.0 | 9.979109 | 0 | 116.0 | 13.035437 | 4.0 | 373.0 | 0.597853 | 0.296449 | False | 0.16 | False |
bbab55c4da531e4695a6e3e577aaa4975f0fce79 | 16 | 2 | 1 | 0.988699 | 6.0 | 81.107118 | 0 | 123.0 | 0.302407 | 7.0 | 2810.0 | 0.005334 | 0.389047 | False | 0.18 | False |
a2ba455e2d2bc41f4a80a08d5434b741ed715ef4 | 519 | 13 | 1 | 0.861772 | 24.0 | 16.003877 | 0 | 194.0 | 2.348328 | 3.0 | 2086.0 | 0.122311 | 0.389511 | True | 0.61 | False |
9382ee659212285a203550cf60476dd146d27a29 | 89 | 2 | 1 | 0.996276 | 3.0 | 105.237789 | 0 | 9.0 | 21.841916 | 3.0 | 632.0 | 0.235955 | 0.820225 | False | 0.20 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
209c09a76f5e90aaa2899804686e6a513703d887 | 2 | 1 | 1 | 0.000000 | 2.0 | 20.872523 | 1 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | False | 0.03 | False |
1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb | 180 | 39 | 1 | 0.846758 | 24.0 | 92.130301 | 0 | 411.0 | 0.501826 | 2.0 | 720.0 | 0.000268 | 0.094218 | False | 0.39 | False |
aaf5762be5d37cac022dc321b6400b9743a25303 | 4 | 2 | 1 | 0.413817 | 2.0 | 18.489861 | 0 | 49.0 | 26.872975 | 4.0 | 2299.0 | 0.004550 | 0.352067 | False | 0.08 | False |
75c7f6a17a5bb78074518877bf73f0071b7758eb | 0 | 1 | 1 | 0.000000 | 1.0 | 54.908843 | 0 | 4.0 | 65.434687 | 8.0 | 1361.0 | 0.001139 | 0.840547 | False | 0.01 | False |
3824051b1e5618388a17c88867a3037397bc96b7 | 31 | 2 | 1 | 0.858231 | 7.0 | 75.987384 | 0 | 205.0 | 0.044525 | 2.0 | 713.0 | 0.011105 | 0.094461 | False | 0.20 | False |
3963 rows × 16 columns
display(rf_correctly_predict_df)
la | nd | ns | ent | nrev | rtime | self | ndev | age | app | rrexp | asawr | rsawr | pred | defective_prob | defect | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
commit_id | ||||||||||||||||
1e6973aee7137653c62dcef970b1e2527b50517d | 258 | 2 | 1 | 0.826278 | 2.0 | 18.049560 | 0 | 1.0 | 5.242755 | 4.0 | 465.0 | 0.004262 | 0.082046 | True | 0.57 | True |
5d0ccceb20780fdd3adf519d3f8e6b80b1844407 | 190 | 7 | 1 | 0.839470 | 25.0 | 15.181620 | 0 | 164.0 | 0.833137 | 2.0 | 1931.0 | 0.122340 | 0.361879 | True | 0.58 | True |
de31210c05f464c4a79255de68b1a515d9b84ed3 | 121 | 2 | 1 | 0.758523 | 2.0 | 0.357431 | 0 | 11.0 | 4.983553 | 2.0 | 812.0 | 0.057416 | 0.873206 | True | 0.61 | True |
957533f685caf9ffc0d9cad569598455d59ade34 | 121 | 5 | 1 | 0.863629 | 7.0 | 19.054421 | 0 | 4.0 | 7.935347 | 5.0 | 1364.0 | 0.197917 | 0.911458 | True | 0.60 | True |
96677735f6d8f1f5b6380127956921f719aab799 | 525 | 2 | 1 | 0.707022 | 29.0 | 20.117373 | 0 | 7.0 | 4.144878 | 1.0 | 98.0 | 0.002381 | 0.104762 | True | 0.56 | True |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9bc29208bda6071a34bcc0da36a396eb8bab4f30 | 22 | 2 | 1 | 0.886541 | 7.0 | 74.999502 | 0 | 20.0 | 5.003831 | 8.0 | 1214.0 | 0.033998 | 0.864009 | True | 0.70 | True |
1a40831f1711bf46c878dd247679bb05d19ffb5a | 99 | 2 | 1 | 0.861487 | 13.0 | 36.234155 | 0 | 12.0 | 19.055550 | 2.0 | 562.0 | 0.166096 | 0.512842 | True | 0.60 | True |
605749ca12af969ac122008b4fa14904df68caf7 | 627 | 9 | 1 | 0.663883 | 5.0 | 8.960463 | 0 | 181.0 | 0.379626 | 4.0 | 1467.0 | 0.005115 | 0.216853 | True | 0.51 | True |
85239cc81440d9e5a4aee3c0961c96a4197ad939 | 166 | 5 | 2 | 0.781693 | 6.0 | 1.367859 | 0 | 2.0 | 1.527556 | 3.0 | 1315.0 | 0.136022 | 0.947368 | True | 0.54 | True |
f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 | 46 | 2 | 1 | 0.852405 | 4.0 | 73.000278 | 0 | 23.0 | 5.182778 | 5.0 | 1174.0 | 0.006821 | 0.847885 | True | 0.51 | True |
198 rows × 16 columns
# rf_prediction_df_exclude_correct_prediction = rf_prediction_df.drop(index=rf_correctly_predict_df.index)
# lr_prediction_df_exclude_correct_prediction = lr_prediction_df.drop(index=lr_correctly_predict_df.index)
# all_indices = set(x_test.index)
# correct_indices = set(rf_correctly_predict_df.index)
# remain_indices = all_indices-correct_indices
# print(len(all_indices))
# print(len(correct_indices), len(remain_indices))
# display(x_test.loc[rf_correctly_predict_df.index])
# display(y_test.loc[rf_correctly_predict_df.index])
Note: this step includes instance generation
def create_every_explainer(global_model_name, df_indices):
global_model_name = global_model_name.upper()
if global_model_name not in ['RF','LR']:
print('wrong global model name. the global model name must be RF or LR')
return
global_model = pickle.load(open(proj_name+'_'+global_model_name+'_global_model.pkl','rb'))
indep = x_test.columns
dep = 'defect'
class_label = ['clean', 'defect']
# for our apporach
pyExp = PyExplainer(x_train, y_train, indep, dep, global_model, class_label)
# for baseline
# note: 6 is index of 'self' feature
lime_explainer = LimeTabularExplainer(x_train.values, categorical_features=[6],
feature_names=indep, class_names=class_label,
random_state=0)
feature_df = x_test.loc[df_indices]
test_label = y_test.loc[df_indices]
save_dir = os.path.join(pyExp_dir,proj_name,global_model_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for i in range(0,len(feature_df)):
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = 'CrossoverInterpolation')
# synt_pred = pyExp_obj['synthetic_predictions']
pyExp_obj['commit_id'] = row_index
# because I don't want to change key name in another evaluation file
pyExp_obj['local_model'] = pyExp_obj['local_rulefit_model']
del pyExp_obj['local_rulefit_model']
# print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)),
# str(len(synt_pred))))
# pickle.dump(pyExp_obj, open(pyExp_dir+proj_name+'_'+explainer+'_'+search_function.lower()+'_'+row_index+'_20_rules.pkl','wb'))
X_explain = feature_df.iloc[i] # to prevent error in LIME
exp, synt_inst, synt_inst_for_local_model, selected_feature_indices, local_model = lime_explainer.explain_instance(X_explain,
global_model.predict_proba,
num_samples=5000)
lime_obj = {}
lime_obj['rule'] = exp
lime_obj['synthetic_instance_for_global_model'] = synt_inst
lime_obj['synthetic_instance_for_lobal_model'] = synt_inst_for_local_model
lime_obj['local_model'] = local_model
lime_obj['selected_feature_indeces'] = selected_feature_indices
lime_obj['commit_id'] = row_index
# pickle.dump(lime_obj, open(pyExp_dir+proj_name+'_lime_'+row_index+'.pkl','wb'))
all_explainer = {'pyExplainer':pyExp_obj, 'LIME': lime_obj}
pickle.dump(all_explainer, open(save_dir+'/all_explainer_'+row_index+'.pkl','wb'))
print('finished',row_index)
# break
# end = time.time()
all_indices = set(x_test.index)
rf_correct_indices = set(rf_correctly_predict_df.index)
rf_remain_indices = all_indices-rf_correct_indices
lr_correct_indices = set(lr_correctly_predict_df.index)
lr_remain_indices = all_indices-lr_correct_indices
create_every_explainer('RF',rf_correct_indices)
create_every_explainer('LR',lr_correct_indices)
# create_every_explainer('RF',rf_remain_indices)
# create_every_explainer('LR',lr_remain_indices)
# print(len(correct_indices))
# print(len(remain_indices))
os.path.join('dir1','dir2')+'/file.f'
'dir1/dir2file.f'
obj = pickle.load(open(pyExp_dir+proj_name+'_'+'LR'+'_all_explainer_'+'99379d6ec00b7bdfe5a625877d8e680f9240efcc'+'.pkl','rb'))
print(obj.keys())
print(obj['pyExplainer'].keys())
dict_keys(['pyExplainer', 'LIME']) dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'top_k_positive_rules', 'top_k_negative_rules', 'commit_id', 'local_model'])
print(correctly_predict_df.columns[:-3])
Index(['la', 'nd', 'ns', 'ent', 'nrev', 'rtime', 'self', 'ndev', 'age', 'app', 'rrexp', 'asawr', 'rsawr'], dtype='object')
# global_model = pickle.load(open(proj_name+'_global_model.pkl','rb'))
# load_prediction_from_file = True
# class_label = ['clean', 'defect']
# if load_prediction_from_file:
# correctly_predict_df = pd.read_csv(dump_dataframe_dir+proj_name+'_correctly_predict_as_defective.csv')
# correctly_predict_df = correctly_predict_df.set_index('commit_id')
# dep = 'defect'
# indep = correctly_predict_df.columns[:-3] # exclude the last 3 columns
# # print(correctly_predict_df.columns)
# # print(len(correctly_predict_df.columns))
# # print(indep)
# # display(correctly_predict_df)
feature_df = correctly_predict_df.loc[:, indep]
test_label = correctly_predict_df.loc[:, dep]
problem_index = [] # store index that cannot build pyExplainer
create_every_explainer(x_test, y_test)
prepare data done... -------------------------------------------------------------------------------- finished bdba16035dd03310259203f7baf576fc6fcb530b -------------------------------------------------------------------------------- finished b63c6f9f839188ade489d77747164b0fec8ab116 -------------------------------------------------------------------------------- finished 9d98070f7b2e06ac8cc30d12523798df4418eed0 -------------------------------------------------------------------------------- finished 55bfb412f1732e97da76f1b34e581d2429df38e6 -------------------------------------------------------------------------------- finished da42fe01d99eaeb0cda5387efefeb9e284c6e20b -------------------------------------------------------------------------------- finished 69191a1858cd3df651240a5ad1c384a470baa356 -------------------------------------------------------------------------------- finished 69a4e6cde8769992d06ec3126257cf2adea0e525 -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- finished 96a8a1643b90219b15aa3aa83f075044e7b85c40 finished 36be3145c76d185ecabac20c4df145b7f46f67b1 -------------------------------------------------------------------------------- finished 4227b128a42a1c5785ac13245de511fbcd358e37 -------------------------------------------------------------------------------- finished 8db51da1e25f1c148fbe8d8f27d144a39d1850f3 -------------------------------------------------------------------------------- finished 85b8ef62da3ae13923ebf449da3dcb3da224adb1 -------------------------------------------------------------------------------- finished 983d815a80039d1f4e0d735c3041882833ecc488 -------------------------------------------------------------------------------- finished 5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef -------------------------------------------------------------------------------- finished dec1e329c2ebba9fd0f9e96d1ae85aa084ccba6f -------------------------------------------------------------------------------- finished e43c5c1ea6441ad7d36e25bd283a5a580bf5cb09 -------------------------------------------------------------------------------- finished 9842dcb34cb79b942391fdab4a00dd790b5e299e -------------------------------------------------------------------------------- finished 35da9612e0ca8b5d7bf238f4059f1ba2e576cb01 -------------------------------------------------------------------------------- finished f66de80e38bcfbc29228ea19ffc96fbf34aad0ba -------------------------------------------------------------------------------- finished 4f4b47803f3a7f820a6f68866fe7a36a37aff00e -------------------------------------------------------------------------------- finished 2672a1e24ed0ad0c189110eb04ddcba78cd48be5 -------------------------------------------------------------------------------- finished 18d35d0949868840aba7b6720a76c3d6c5ec9608 -------------------------------------------------------------------------------- finished 506a8f58cf4b8cecf90b647c7deba47da2a4dfec -------------------------------------------------------------------------------- finished d039736aea3def9015c8c4b1277a3115a63a211a -------------------------------------------------------------------------------- finished 8d985b826b459335ab89fef69c132470af066daf -------------------------------------------------------------------------------- finished e91c6ffbea7cdae3865b1f78f76c83cf1c5dc00e -------------------------------------------------------------------------------- finished fe7198a6ecf45edd8b5ca2030586c746f204fe3f -------------------------------------------------------------------------------- finished e541ca285d5a0f484c886fb28514b84580c3dc02 -------------------------------------------------------------------------------- finished 0278a38153f9649aab1cc641bfabd8d5738d2d8c -------------------------------------------------------------------------------- finished e52b5e8e98dd640c69d009a3d5546a479e394d81 -------------------------------------------------------------------------------- finished d4b024ad7d64a854072517eef47b059c93bdfdd3 -------------------------------------------------------------------------------- finished 209c0e0a0d92199ce44f8c787c2582aa41fb27d7 -------------------------------------------------------------------------------- finished 285a3a88a1ede50014d4f4a124994a2a0e85705b -------------------------------------------------------------------------------- finished e19cf5c0475fcc69cda1a883f29181247c4c8f54 -------------------------------------------------------------------------------- finished 878ac164a391e761c72c5fdcd12f0caf48c7d359 -------------------------------------------------------------------------------- finished e120c7301942e3c4f352ae250a1b4f6bf89aabde -------------------------------------------------------------------------------- finished 844a228c3c42dac802a82510eb17fcfaccb0e1f8 -------------------------------------------------------------------------------- finished f0b77aa053a2d5456d72d178d4fac8e2684cc111 -------------------------------------------------------------------------------- finished 402a44850ba1783368fb349fafecdeedd9da7fd2 -------------------------------------------------------------------------------- finished 15ef6089e5dbc54f8d6cdbfa9281ff523d60548a -------------------------------------------------------------------------------- finished 72f85b5e616e3aabcab862b944c1a3a2c2844431 -------------------------------------------------------------------------------- finished ab55af8ed5afd0765a23a85d608d4b6d35bdd166 -------------------------------------------------------------------------------- finished f2dfdd4ab37965a6559f3c033cdf877e0fe172be -------------------------------------------------------------------------------- finished 928a119ac0f8d750f616f3eff4fadc82b23fc7c6 -------------------------------------------------------------------------------- finished ba4ed39616c3c6fabf24ca390c7037877c672fba -------------------------------------------------------------------------------- finished 12c8ea8569a64e0688544db18da1946e4a3ffd82 -------------------------------------------------------------------------------- finished 51fc2bd41e236404a8db8e4beefd0ec265fb1a5a -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- finished 01a44568cc60bb5a6dd7b55d69b20bba57d1b94b finished 0a87ff988cd21586daeb6ed886e44aedaa49c320 -------------------------------------------------------------------------------- finished de31210c05f464c4a79255de68b1a515d9b84ed3 -------------------------------------------------------------------------------- finished dc5c2cb7f2ddfb8b7311373be423a51cdf700f9f -------------------------------------------------------------------------------- finished d91bc54749fa4ffdda203db5ad88dc4f20c72eb8 -------------------------------------------------------------------------------- finished d7796858f1e23e5284b12f48d79ddfc63929e7fb -------------------------------------------------------------------------------- finished bdaf3a26bb924a0debc7e753cdf2135dcc20d833 -------------------------------------------------------------------------------- finished 7aea4d8b21013d5ef6b14111b42c4bd101789ed1 -------------------------------------------------------------------------------- finished 2dbea4a9f524337576ffe5610708f90e08be8853 -------------------------------------------------------------------------------- finished f9cf84273bd4d89cdbe267b2a5ed9c9ed20492e9
'''
search function: lime or crossoverinterpolation
'''
def create_pyExplainer_obj(search_function, feature_df, test_label, explainer='rulefit'):
if search_function not in ['lime','CrossoverInterpolation']:
print('the search function must be "lime" or "CrossoverInterpolation"')
return
problem_index = []
time_spent = []
for i in range(0,len(feature_df)):
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
start = time.time()
try:
if search_function=='CrossoverInterpolation':
# the returned object is dictionary
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = 'CrossoverInterpolation',
top_k = 15,
max_rules=2000,
max_iter = None,
cv=5,
debug = False)
# synt_pred = pyExp_obj['synthetic_predictions']
pyExp_obj['commit_id'] = row_index
# because I don't want to change key name in another evaluation file
pyExp_obj['local_model'] = pyExp_obj['local_rulefit_model']
del pyExp_obj['local_rulefit_model']
# print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)),
# str(len(synt_pred))))
pickle.dump(pyExp_obj, open(pyExp_dir+proj_name+'_'+explainer+'_'+search_function.lower()+'_'+row_index+'_20_rules.pkl','wb'))
else:
X_explain = feature_df.iloc[i] # to prevent error in LIME
exp, synt_inst, synt_inst_for_local_model, selected_feature_indices, local_model = lime_explainer.explain_instance(X_explain,
global_model.predict_proba,
num_samples=5000)
lime_obj = {}
lime_obj['rule'] = exp
lime_obj['synthetic_instance_for_global_model'] = synt_inst
lime_obj['synthetic_instance_for_lobal_model'] = synt_inst_for_local_model
lime_obj['local_model'] = local_model
lime_obj['selected_feature_indeces'] = selected_feature_indices
lime_obj['commit_id'] = row_index
pickle.dump(lime_obj, open(pyExp_dir+proj_name+'_lime_'+row_index+'.pkl','wb'))
print('finished',row_index)
# print(row_index)
# print('just one rulefit is enough')
# break
except Exception as e:
problem_index.append(row_index)
print('-'*100)
print(e)
# print('found total {} problematic commit'.format(str(len(problem_index))))
print('-'*100)
# break
end = time.time()
time_spent.append(str(end-start))
# print(row_index)
# break
print('from total {} commits, there are {} problematic commits'.format(len(feature_df),len(problem_index)))
return time_spent, problem_index
time_spent_rand, problem_index_rand = create_pyExplainer_obj('lime', feature_df, test_label)
pickle.dump(time_spent_rand, open(other_object_dir+proj_name+'_train_time_lime_randompertubation.pkl','wb'))
pickle.dump(problem_index_rand, open(other_object_dir+proj_name+'_problem_index_lime_randompertubation.pkl','wb'))
# time_spent_rand, problem_index_rand = create_pyExplainer_obj('lime', feature_df, test_label,'rulefit')
# pickle.dump(time_spent_rand, open(other_object_dir+proj_name+'_train_time_lime.pkl','wb'))
# pickle.dump(problem_index_rand, open(other_object_dir+proj_name+'_problem_index_lime.pkl','wb'))
-------------------------------------------------------------------------------- finished f5dbc876378ae58a7bdfe1e9664fc81caca18dfb -------------------------------------------------------------------------------- finished 602bd9873786ccadcb67da3036329f3122555cf8 -------------------------------------------------------------------------------- finished 9d75626b1073113d77988bcb52e99215d5af4787 -------------------------------------------------------------------------------- finished 3168c94577cc604fd033ce4e741da5c411f74666 -------------------------------------------------------------------------------- finished 23bdca417bde716c79168ab372083fd885607123 -------------------------------------------------------------------------------- finished 438211ec627073817fcaf6d3a07b76f2aa5d90e0 -------------------------------------------------------------------------------- finished d8090022f66cc6cff6af5ed2ae702212fd172ff7 -------------------------------------------------------------------------------- finished 06eaeb0ac8d861cf3e60b11a8fbbabdeef1d6759 -------------------------------------------------------------------------------- finished 4dbf574b7acb7ae8f852219700afa95f8d568f0e -------------------------------------------------------------------------------- finished 2eb28f51ce4150fa03b2ddde8c39b502ae57d18a -------------------------------------------------------------------------------- finished 61b56a89a1cf8a388ff925492700e5eef019c3aa -------------------------------------------------------------------------------- finished 07860794da5863610f38295c9d517fc457c5de95 -------------------------------------------------------------------------------- finished b508dc98a5085df6ea44b154177fbb6d8b0e8434 -------------------------------------------------------------------------------- finished b449791c3565940e701c68b3f705014804af0c2b -------------------------------------------------------------------------------- finished 3abecf2ee9fc724af571f8c7da4302f7bee9eadb -------------------------------------------------------------------------------- finished d8d903826daa8db49e437b76fb80693ab8feb01e -------------------------------------------------------------------------------- finished 5b456c8da4462f9e11fa4da78a9e6ea86423a1e8 -------------------------------------------------------------------------------- finished 37ca2224eca671200a2710f57f970d2993e62aa5 -------------------------------------------------------------------------------- finished fd619946be51784dc709363324897be6af144c52 -------------------------------------------------------------------------------- finished 0ed9f7496656fa0ea52d703c7fddff26c2192857 -------------------------------------------------------------------------------- finished a25b1fdf7d642d9f252fbfa1789efaa32e03b994 -------------------------------------------------------------------------------- finished 2a11286469bf28b348821f015d72f8cb06f54b95 -------------------------------------------------------------------------------- finished 42cfb5fe4daa586f382bde6936b0ee33b5298f4d -------------------------------------------------------------------------------- finished 9c82c105a1886473ca144b802ce9f5bec01e35e8 -------------------------------------------------------------------------------- finished aed9a8d49b7470de6809c3bf747b14c7150d7ae6 -------------------------------------------------------------------------------- finished 19b7afca523f221494bd165680f1aa9ddf3a0e31 -------------------------------------------------------------------------------- finished 52fc0a95a109d2e9fa279eeb0284a8178563080b -------------------------------------------------------------------------------- finished d6d119ac6e606a30993bfea1ac3309f74d15bedc -------------------------------------------------------------------------------- finished f89f099c55576992b39a8021aace64ff32747624 -------------------------------------------------------------------------------- finished 0eade30f37980c38b14d5cfa475837d15b69a8c5 -------------------------------------------------------------------------------- finished 87f475cbdb89fcbfbce68c3b676a240bb255a6d9 -------------------------------------------------------------------------------- finished e07e1931ac3d2e270d7696f631aba39ea54ba3c4 -------------------------------------------------------------------------------- finished 8d441936d2e8ab8c39a66831a658c192b80ca597 -------------------------------------------------------------------------------- finished 9329f786da8e167130fa36b91ff288bfdb046ce1 -------------------------------------------------------------------------------- finished a0f8be4021caa9bb5055923f0eea3bee0e345235 -------------------------------------------------------------------------------- finished 78b49cf8361b1462cc94a061916a15f0b98e27e3 -------------------------------------------------------------------------------- finished 81dea57593b5b28990bb7f012aae1387c8d2de33 -------------------------------------------------------------------------------- finished f2233c725078d49f6b185e642325dacb47b33240 -------------------------------------------------------------------------------- finished fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76 -------------------------------------------------------------------------------- finished 661e624121004dd73467300e467aacae6d8d2f66 -------------------------------------------------------------------------------- finished a967a9bdcc2c75a0270c2be48d845ded5332e4f0 -------------------------------------------------------------------------------- finished 954b92e1207bfe5ab5a117e8393c191cdf0044d2 -------------------------------------------------------------------------------- finished b7a585ebd57f85c89ee20eda5b1a06819b3e1af0 -------------------------------------------------------------------------------- finished 906d5c5c40183468f9521277c6244a6c46730de6 -------------------------------------------------------------------------------- finished 8480dcbc7180cf1bcafcd03a6ad7087c83582a97 -------------------------------------------------------------------------------- finished ec61d7b9e82e6d653c02df14f34acdba8fc060db -------------------------------------------------------------------------------- finished 71338b4b2f01268759f7ac6b3eff5abb17420a7c -------------------------------------------------------------------------------- finished 412eb94de4cae754130ae855236420ebd5c42482 -------------------------------------------------------------------------------- finished 99667804e1c64cace1246d4ce342a218fc1fb1ba -------------------------------------------------------------------------------- finished 3afffa47feabc80e1bc20ffd2143a722a1c360a2 -------------------------------------------------------------------------------- finished ef6544ee27aeab20a64b4df4bd50401cefa405ef -------------------------------------------------------------------------------- finished 4e15aa6d7c4f9a03f4ae57b3ba04ade3400cccf1 -------------------------------------------------------------------------------- finished fdd3876111605e84a1f7c31206e20cd99e13f1fb -------------------------------------------------------------------------------- finished 7d35f5c6b7b8b187388523674b4a95585f369d78 -------------------------------------------------------------------------------- finished 459c9a2a8840995436e610459216957bc7ebd914 -------------------------------------------------------------------------------- finished ba9ba084124403bd8930e29d8afcea9d64b6c0b6 -------------------------------------------------------------------------------- finished eacd58d4e78e7238ba5fcca90ba960aaf3ebd263 -------------------------------------------------------------------------------- finished 097b0a531642f79d8b240f89ca1eacca2ee59a5e -------------------------------------------------------------------------------- finished a32bed7047469273cc8513cad7b2923d60f8d590 -------------------------------------------------------------------------------- finished 328f2f9c35f3cc5e7049a060a608c3f72876484a -------------------------------------------------------------------------------- finished db51ffc0d1fbce85fd542403812fab8783c53e3c -------------------------------------------------------------------------------- finished 1de244ea65f1b40c488fe92b29170c1b1d447233 -------------------------------------------------------------------------------- finished eeaba26596d447c531dfac9d6e6bf5cfe4537813 -------------------------------------------------------------------------------- finished 72420889aa54342da4472683e67e2c70b76a170f -------------------------------------------------------------------------------- finished 9424383e6d0ea1dd02dcf1070259e21550da692a -------------------------------------------------------------------------------- finished 6953f3a501dfee5064a12558a2e2515b69b35189 -------------------------------------------------------------------------------- finished d437457becef4409682caee15e1050889a977f91 -------------------------------------------------------------------------------- finished e4588b70dde3fe5ba8f77a0b749ec1b071346767 -------------------------------------------------------------------------------- finished a2e92f6dee12320a58a4bcc95c14649b27804427 -------------------------------------------------------------------------------- finished ccb088a6beaff01419ec9325dbd19040f279b6ea -------------------------------------------------------------------------------- finished 173d16efb54ccc152f19afb9b1c2a87915fb414b -------------------------------------------------------------------------------- finished 7dbb49bdec6e74f9d1eb096d15bdeea0881c3704 -------------------------------------------------------------------------------- finished ff11af4fbc2948a3a3bc635549c7ac349d249abc -------------------------------------------------------------------------------- finished 2b67b50af18bb2a17ff10d5a37abfd85fc7e9d01 -------------------------------------------------------------------------------- finished 5f0023a7469e01bada70fbe677b76bb535eaa953 -------------------------------------------------------------------------------- finished 7c737e0edcb93585856c65890ef34e5c5a28ee6b -------------------------------------------------------------------------------- finished 9bb634a6176c639bd6b52d58151e9927c30919d0 -------------------------------------------------------------------------------- finished 1e8de50674f5b33a50c45224b7e07b3f974f6ab0 -------------------------------------------------------------------------------- finished 979a0406f0013560efbdcc486b32ba93ce8c946f from total 79 commits, there are 0 problematic commits
# time_spent_ci, problem_index_ci = create_pyExplainer_obj('crossoverinterpolation', feature_df, test_label)
# pickle.dump(time_spent_ci, open(other_object_dir+proj_name+'_train_time_LRR_crossoverinterpolation.pkl','wb'))
# pickle.dump(problem_index_ci, open(other_object_dir+proj_name+'_problem_index_LRR_crossoverinterpolation.pkl','wb'))
time_spent_ci, problem_index_ci = create_pyExplainer_obj('CrossoverInterpolation', feature_df, test_label,'rulefit')
pickle.dump(time_spent_ci, open(other_object_dir+proj_name+'_train_time_rulefit_crossoverinterpolation.pkl','wb'))
pickle.dump(problem_index_ci, open(other_object_dir+proj_name+'_problem_index_rulefit_crossoverinterpolation.pkl','wb'))
finished 1e6973aee7137653c62dcef970b1e2527b50517d finished 5d0ccceb20780fdd3adf519d3f8e6b80b1844407 finished de31210c05f464c4a79255de68b1a515d9b84ed3 finished 957533f685caf9ffc0d9cad569598455d59ade34 finished 96677735f6d8f1f5b6380127956921f719aab799 finished 10593c2eaf4eff4edb13b70f023acbbf743f129f finished 8db51da1e25f1c148fbe8d8f27d144a39d1850f3 finished ba4ed39616c3c6fabf24ca390c7037877c672fba finished 2c62e344e26ccba08d5c613a7c4a40a0f076901e finished 207a7825fbcc69c4c3d4bfefd8e9c33978162ed9 finished 03a41f863b160384593ef8df130f369a0c22d393 finished f0ce71c23c786baf7c828d1fd147d70342593a10 finished b199471154cb4be2d7d9c89939bb31e90f3adff4 finished 608c67439c8fcfa1a2fddd1087ca1fe998a8f9fd finished 1e90eb66ae999201a0b3d2e7409734c3e2f452d7 finished e8440d1ee8da2cbf4304bbbc0bf43ce78d7a6d1f finished a9230a38977d6acb804ee8bb6f58c19957dce013 finished e3e4f4d9277c22654ce0dd9a1a0f44a67661e695 finished b6133c35dd587f6b01e8ec12757347b2767713a6 finished 262bdf68b45058fedffda614336d7e75a5b36d4a finished 9928edb42da42af3463d4a989a0797be46198908 finished d2a5c0f982e8e4f9fbdce17b90925b8d8df56c75 finished 5c2339440a499fb8b86e38893f245b9f02395016 finished 0fb40ceb78c0b35e677b3f2305a6d2477bda0b43 finished 106bf560e291b9b8a0690680170db575086e04b3 finished a10fdea3f580bc1c9ef6ae29f4c2ff404d84464e finished 8e575be75c80ea71a6ad8fb73e6ace1ed708938f finished bdf6e2b5f853b0608dfb81b0d0489b469ead5fd2 finished 811a23275da1d35b6ea7dce81ea3d6ac219f42b9 finished 7523489e856e637dc6af23638568f4700980360f finished 17c4fd71038b0603bf4f0035e46fc0ff1bc517f7 finished d72848c9afba40d21235a4e95cf8e69549290dca finished a1fe496e1113737d0b133a64078bc45c485dd3b2 finished dc48ac1a9c02c236157347f715190a2b1107ec70 finished fe7198a6ecf45edd8b5ca2030586c746f204fe3f finished d91bc54749fa4ffdda203db5ad88dc4f20c72eb8 finished 1a2a692b79cefd1fc7e743e8e616292107afacdf finished 14dc194af688a18b254aea761beb586214c56224 finished 0ebddd87d159072daa0ff36d0ed8af8e4b60fd66 finished d79861560bafce63ff807b10cb435fd59e7becaa finished 8db14390d418561f7c372902a4a383dc9047603c finished 1b66a47b5f4cbcdf1a9a1ec38532474588a3fee2 finished 4cdccd69a45aec19d547c10f29f61359b69ad6c1 finished 34a208d1f3829173815beca81d07b53633a12989 finished a2dca1ffc3e6ba81862c0eff625b620f3f175690 finished d632b66dc8b701ca777af4335b6505b4c4cd7828 finished fbbf79f6cadbb1d2b7ce615e9fdb9a0e0f114729 finished f614417f11c4b936e517f0da449038f6a2ca35d4 finished c70350bfa58ced11e7b346f9ad3ba85b0617e8f8 finished 60ab0c8c564af989882f0ea2609019ea7206e2b3 finished 16a9853fe3924ccaf987b2a1628f27bb9c03b950 finished f66de80e38bcfbc29228ea19ffc96fbf34aad0ba finished 01a44568cc60bb5a6dd7b55d69b20bba57d1b94b finished acf0209b28e21eed60158967fab77468eb195e7c finished 010bd1f392e67a6fcd276593b8c79acfe41d1cc7 finished cb0df591a9508e863ad5d5d71190eca349dc551f finished ca85d237e236f46881dc2c57a589a33e4605917d finished d4b024ad7d64a854072517eef47b059c93bdfdd3 finished e68617c141c8a16a26bc4ade641aa4a5a809619d finished 607acb33be18f0e508329f3c8514061322d294a9 finished c586d635387e9baa3c0857afb56d05137fcddd7c finished be23b19905a9e500fa4b14c43b9492f4b43e9828 finished 4757ccfc8eb10bee756deb36c72d7a7ca37bdbff finished eb87f1fae8f13c7ab09c9fec56bbfa1fdfdf17fc finished f0a9b31d90532dd278c12572767c3874648dcaec finished 1a5c23a2810d4c887d1239eeab09dd9904892c8e finished 04ec96cdc95396ba2f35493a632b603d182ab6ae finished 6c4b89a2ad51f92707acc01908b63bc507ad8fe7 finished 512055ec63fcb081cda01b9ef40c15a91f78d7ac finished a4b5502904b72f3503d70104c1d1296cf6f7d30e finished 18c3ac4a8935b9997dbb55181d5dbb5232ef2c27 finished ae6b7642e8d32ef5fa75cdcfe55be23c052fd547 finished bd5c3f5a0e3449a4a384090ae4dba75c4cdcf4da finished 8f3a54f0474e263aff886d671624faa1dc8d223c finished f4b78c7f17e29448ed54b136eeb4ac700b324120 finished 6925bd7e00d5c39d20450bcf41e848435b6d9830 finished 6d5fd4836475d0f3787b669cab5ef2d5266f474f finished 724493d21fdfcbb4c095b54975c0c1d612f0a856 finished 604ec1c5786256a8e9390bbe0414df799370862b finished 1d9a0a620d78ab54f7a3da61b803a97cdbdd01f2 finished c6d82083295e9b1b42f22d3a2d25a1ab7d341a13 finished a52259ea2dda742d528723d58f20b3b225f1eac5 finished 9411a24ba7b5a1380ec8a2aa13d4aa92988d2ff9 finished 23bec4f7cb6e84a8d717c0b5ccc222f51c67f072 finished 7b64653931628328bd5d70b0cdea8952e6c709f8 finished 5c959bd66fa73f96c02a1c7632576578affbf146 finished 30fa37e7776831d6f8022f52d3d92f62189fb702 finished 432a47cbc03414ca7ee6565c9dbcc91d7bf675fb finished 0493c803ae9612f87ed028e1a39e880aead5bdcb finished 874e272a56d314a6fb1bd98e7ecd7f986f3b3898 finished d4b7d5fd427d3c46bb61a45edd2623305e23b90b finished cc70e3a28df3c8492769b305d248bb4d9bf32830 finished 51ecc8016ae5d79397f6c584634030187eedd342 finished a25b2ac5f440f7ace4678b21ada6ebf5ce5dff3c finished 97a529ad8eaee80e196eb362c4e45901a96ae23c finished 1d62871fdd980ada24258d9694b50c5125280801 finished 1763c80711993c55f4f13afe56f449b1dd6d3d3a finished 1957339df302e2da75e0dbe78b5d566194ab2c08 finished aa198b92ac03f14e2da240064904b3a991b92834 finished 9567c2b6a06aa1e8205f9f30beca63d77500dd1d finished a522d5ce0678bfcffed1972d830627278d778dec finished 14463cc3d37e1123a855259718e1c8d816baf61d finished 0a84a7fb24a4605f0da863407512612651890003 finished 80a0a40e970202fa23b5a72f993c54c2264878f1 finished d143540ad1b69ec93c2b7bfadd1f654c4d8c7a34 finished b3c9cc504903eccbc68c441a81b0a727a83117fa finished a0cbbf8e1afe6e9d6bbe29f977b2726146615447 finished 965542bfac90194bd032e5e6aeb6a507dcb11088 finished 9e5a07c8e2f5d7fb3980f538846b53f0d7c62411 finished f63d2f67ed1e7b8246b36bd08517a55a702a48a9 finished f1b9ac5a542a3125d757094fccda80c80c6dd420 finished b9ff8cd7ad8c402787324d2baca9b32f61eafb4a finished e8ab7ffb7274190105a3289cc1a3ed1d9ab5feb8 finished b0306a93645ea6475a2bc045d8fc8bc4bd6f00a5 finished b6a8aea4d1fe8be6073af57fad2ab6863d8f359c finished bff120a4775a1b1f3846a27c38d8eff4a678fd24 finished 713e538237c7fea0b93ade343b1d9368bdbf2698 finished 3fd1beb85c0888251781fe56b067b08c4957b94b finished 486e10d3740301b417e2a972ec7c797f562e75be finished 2f49ed4b5dbb5c954fc7a9b42ee7b170c38c775c finished 65aa92b0348b7ab8413f359b00825610cdf66607 finished 17e556acf5c33583dcb6ed34bfd5a5fd9c148c9e finished 0eb7e35fbfe5de5f9f00ff1a0cce1d6a61f3a902 finished d122b2e05a9e13d5caea3f8f6578bb473fbb9c5e finished 1679acd53d1f0c330edf583afe8b347a7304499c finished 5ad1af72c62dd18ecf38f604094be8fd2c7a5ef8 finished 015555acb75ee4d9298915951d2bfaf0d19d2b02 finished 048faa57037d30c46205dd3dedec45fea62ef2b1 finished 10004672ad1476c55deaad53684a50358da6f656 finished 46922068ac167f492dd303efb359d0c649d69118 finished 11b5487eff312d4d914fbc2f861e18b031421dbe finished dd9536ac6e6df0e1ae6754e580b99cfbfd05eb77 finished b3e05bdb51e2f238db655e75ff1384bd8a111fe0 finished 5a8e67be491a2b4897c01aeccd584381f74bc294 finished 254e37ab3c995f6514084d38f2f797da9cf5e5a9 finished 848ef0043f60795db680afe8f67b633459eaf52c finished 10c5c93925abe3d34c4430e0ed852d8358fb2353 finished 3046c4ae22b10f9e4fa83a47bfe089554d4a4681 finished 25bb8fa99ec23ad5bc97a5c28b0426492bc843c6 finished 65ee22f7e7e62f8a1fab5a4318f62733a9a11ebf finished 2e323ba7b154c6bdb76a4c018e99004dabcb8420 finished 5f9831768461c34533ff93f32b31e095b57d82d1 finished 4dbb55cbb0020702a3ea2ec89ddf61319d00f858 finished a9661fba21d7e5dd3fb5d29f77266993e3adc460 finished befa0b9184eb0c6248d06efa5b02be8217f1722e finished 73d4a7c1b4681accd993c1109a9eff837ff612f4 finished 2702baed390d094b0eac07d0ae167ed236868d00 finished ecdc5728ca849fd19d01c2a69cf07bf0989e8567 finished a1b08606f2a73339b31ab49d172ae8793d76a55b finished f9b5dba7b0612222b20a1d63f494b85c63c0e47e finished 92f281e7a04f27c90cfccc11261880213718ae8b finished e1c33e2ec7f6a59495ee274bdf95e740bedc1e0f finished 13f9c5c372d052d630484afa44f6555fc4117326 finished f87e7d964c19cc2be33226df66f0c823af993d49 finished a9a59ccbacafd6eb94f57861cfc28f5a24f474db finished 3553e400046e50a30e2c5a7dba0c6ea48d9c816f finished c25c60f6a9ab1ccf12f72f76d400e7c9c0d090b3 finished 1d6235cc3fb3c7806c2c4671f1c5b03022136008 finished 609f01a1ddca64ec191cb15a4f6fb93d219c3336 finished 683323f3593a9123a6e87d6b0f50bb0679f13e56 finished dbacb52f420364cb85500a9c132207670afef4b5 finished 791256cb671630ff70c941272df89717a7216eeb finished 2a4e50caeaa271937a23ab7c052c7e9c47c1933f finished 1724c5cee71d3b32466dd9d6cabd22a3339546b4 finished fc27c5e3df4b047f2da31e6217e36936b331093f finished 7255e056092f034daaeb4246a812900645d46911 finished f7fcb809f097cb84e07e2d0093cc9af92c82d29d finished 4711ff3adff2b4c9a54f5fb4448ace6558f82f6b finished 9271e300a55f8aa16ff867fff71adadb383849b8 finished 55b54ac9e6b9b83f837e1bf6eaed2be2cce5aab9 finished f8ab9d4366c87724cba2e49e71dc72e2427d5a68 finished bcd9f363ff8a10ff30795c54f471a8c6d76450fe finished 41e3a94ae18a9219ff4ab10389f3a7799c9b6493 finished b7b0c7dbcd3e6754bc09b2fd75d888c41ae4aadb finished 901b303f1e836421c16f914d3d60b6c9caabefde finished 5a7808d676a844c5c1327bd59ec184e26e0546b1 finished 2bfc7171c23d0595aa7f8680271778bc58cb28ba finished 132f13e5b095bdeaa0db72b151a3bf912ccfca36 finished 4c1b1a893e82f045f5c6c7c9d7b019ddb01cb425 finished e6c0f9c1685e4ffbd72ae75808361252652b7fc2 finished c823016d31b36c10cb6faafcce68a1b98f4dc345 finished e6a3206523b8e1a4edd8867a8e7e36a58631d90a finished a234ecda87f803b05637f3d74ba53815f20f472f finished 77821190873a415e2fef445474d997103c8d5651 finished 749b5b98859685d72ebfc684f8066ea138f8b76b finished 0c33d1443447e76481fdcc19af1b000d60219d10 finished 6bcd9402533e1115efa0e011d2b84feca2b48b19 finished e132bc49c458cb37696d4933ffdf9c478a8bef2b finished b85e907e727240bacaac56374653b3e38bc33396 finished 2390857d7ae625dcd18a72b2980f2d862b776623 finished 237746a6d63c96726e7966770a602b62205ee333 finished 4d53dc4c74e6bd47d63c5aa0749a5d65066c2baf finished 827cc5170546e88e5c4ab721f69b8501371e3948 finished 9bc29208bda6071a34bcc0da36a396eb8bab4f30 finished 1a40831f1711bf46c878dd247679bb05d19ffb5a finished 605749ca12af969ac122008b4fa14904df68caf7 finished 85239cc81440d9e5a4aee3c0961c96a4197ad939 finished f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 from total 198 commits, there are 0 problematic commits
explainer = 'rulefit'
pyExp_random = pickle.load(open(pyExp_dir+proj_name+'_'+explainer+'_randompertubation_3126.pkl','rb'))
pyExp_crossover = pickle.load(open(pyExp_dir+proj_name+'_'+explainer+'_crossoverinterpolation_3126.pkl','rb'))
print(pyExp_random.keys())
dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'local_model', 'top_k_positive_rules', 'top_k_negative_rules'])
# euclid_dist = euclidean_distances(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]
dist_rand = euclidean_distances(pyExp_random['X_explain'].values.reshape(1,-1), pyExp_random['synthetic_data'].values)[0]
# plt.boxplot(dist)
dist_cross = euclidean_distances(pyExp_crossover['X_explain'].values.reshape(1,-1), pyExp_crossover['synthetic_data'].values)[0]
# plt.boxplot(dist)
data = [dist_rand, dist_cross]
plt.boxplot(data)
{'whiskers': [<matplotlib.lines.Line2D at 0x7f991f275760>, <matplotlib.lines.Line2D at 0x7f991f275ac0>, <matplotlib.lines.Line2D at 0x7f991f2caa00>, <matplotlib.lines.Line2D at 0x7f991f24c1f0>], 'caps': [<matplotlib.lines.Line2D at 0x7f991f26bc70>, <matplotlib.lines.Line2D at 0x7f991f26bd60>, <matplotlib.lines.Line2D at 0x7f991eedcaf0>, <matplotlib.lines.Line2D at 0x7f991eedcbb0>], 'boxes': [<matplotlib.lines.Line2D at 0x7f991f275d30>, <matplotlib.lines.Line2D at 0x7f991f265400>], 'medians': [<matplotlib.lines.Line2D at 0x7f991f265a30>, <matplotlib.lines.Line2D at 0x7f991eedc100>], 'fliers': [<matplotlib.lines.Line2D at 0x7f991f265c10>, <matplotlib.lines.Line2D at 0x7f991eedcf40>], 'means': []}
# display(feature_df)
# for c in feature_df:
# print(c)
# for k in range(0,1):
# print(k)
# test feature binarizer
# from pyexplainer.features import *
# fb = FeatureBinarizer(negations=True)
# fb.fit(x_train)
# display(fb.transform(feature_df))
# search_function='randompertubation'
# i = 3
# X_explain = feature_df.iloc[[i]]
# y_explain = test_label.iloc[[i]]
# row_index = str(X_explain.index[0])
# start = time.time()
# pyExp_obj = pyExp.explain(X_explain,
# y_explain,
# search_function = search_function,
# top_k = 1000,
# max_rules=2000,
# max_iter =None,
# cv=5,
# explainer='rulefit',
# debug = False)
print(pyExp_obj.keys())
dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'local_model', 'top_k_positive_rules', 'top_k_negative_rules'])
# display(pyExp_obj['synthetic_data_fb'])
# local_model = pyExp_obj['local_model']
# print(local_model.predict(X_explain.values))
# print('------------------Explanation from local model-------------------------')
# print(local_model.explain())
[False]
# display(pyExp_obj['X_explain'])
# print(local_model.predict(pyExp_obj['X_explain_fb']))
# synthetic_instances = pyExp_obj['synthetic_data']
# sample_instance = pyExp_obj['X_explain']
# def agg_list(val):
# return np.mean(val), np.median(val), np.max(val)
# cos_sim = cosine_similarity(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]
# euclid_dist = euclidean_distances(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]
# '''get prediction from global model then compare with result obtained from model (but how to compare??)'''
# local_model = pyExp_obj['local_model']
# '''In case pyExp uses logistic rule regression'''
# fb_sample_instance = pyExp.feature_binarizer.transform(sample_instance)
# # print(pyExp.feature_binarizer.transform(sample_instance))
# local_pred = local_model.predict(fb_sample_instance)
# local_pred_prob = local_model.predict_proba(fb_sample_instance)
# '''In case pyExp uses RuleFit'''
# # local_pred = local_model.predict(sample_instance)
# # local_pred_prob = local_model.predict_proba(sample_instance)
# print(local_pred, local_pred_prob)
[False] [0.06562197]
# test rulefit
search_function='randompertubation'
print('------------------Prediction from local model-------------------------')
for i in [3,5,7,20,50,100,83,25,163,127]:
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
start = time.time()
try:
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = search_function,
top_k = 1000,
max_rules=2000,
max_iter =None,
cv=5,
explainer='LRR',
debug = False)
end = time.time()
# print('time spent to train LRR:',str(end-start),'secs')
local_model = pyExp_obj['local_model']
print(local_model.explain())
# print(global_model.predict_proba(X_explain)[:,1], local_model.predict_proba(pyExp.scaler.transform(X_explain.values))[:,1])
# print(local_model.explain())
print('-'*100)
except:
print('-'*100)
print('there is only 1 class in the generated instances')
print('-'*100)
------------------Prediction from local model-------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) 0.926494 1 nrev <= 1.00 -11.265911 2 nd <= 1.00 -3.340535 ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) -4.822969 1 nrev <= 1.00 -21.790036 2 ent <= 0.90 11.04842 3 asawr <= 0.16 2.480303 4 nuc <= 3.00 2.480303 ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) -9.127939 1 nrev <= 2.00 12.185013 2 nrev <= 1.00 -7.495421 3 ent <= 0.63 -7.495421 ---------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------- there is only 1 class in the generated instances ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means " /home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) 0.721183 1 nrev <= 1.00 -8.482928 2 asawr <= 0.05 -7.399822 ---------------------------------------------------------------------------------------------------- rule coefficient 0 (intercept) -3.748032 ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means " /home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) -9.149672 1 nrev <= 2.00 14.773528 2 nrev <= 1.00 -9.91744 3 ent <= 0.00 -8.247625 ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
rule coefficient 0 (intercept) -10.434791 1 nrev <= 1.00 -15.862358 2 asawr <= 0.09 15.725896 ---------------------------------------------------------------------------------------------------- rule coefficient 0 (intercept) -2.933625 ---------------------------------------------------------------------------------------------------- rule coefficient 0 (intercept) 1.111113 1 ld <= 0.00 -11.000013 2 nrev <= 1.00 -2.505401 3 asawr <= 0.00 -0.249322 4 asawr <= 0.00 -0.249322 ----------------------------------------------------------------------------------------------------
/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means " /home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge warnings.warn("The max_iter was reached which means "
# test rulefit
search_function='crossoverinterpolation'
print('------------------Prediction from local model-------------------------')
for i in [3,5,7,20,50,100,83,25,163,127]:
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
start = time.time()
start = time.time()
try:
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = search_function,
top_k = 1000,
max_rules=2000,
max_iter =None,
cv=5,
explainer='rulefit',
debug = False)
end = time.time()
# print('time spent to train LRR:',str(end-start),'secs')
local_model = pyExp_obj['local_model']
print(global_model.predict_proba(X_explain)[:,1], local_model.predict_proba(X_explain.values)[:,1])
# print(local_model.explain())
print('-'*100)
except:
print('-'*100)
print('there is only 1 class in the generated instances')
print('-'*100)
------------------Prediction from local model------------------------- [0.84] [0.96695341] ---------------------------------------------------------------------------------------------------- [0.73] [0.49372837] ---------------------------------------------------------------------------------------------------- [0.83] [0.99548978] ---------------------------------------------------------------------------------------------------- [0.67] [0.93530316] ---------------------------------------------------------------------------------------------------- [0.6] [0.94639136] ---------------------------------------------------------------------------------------------------- [0.66] [0.94685406] ---------------------------------------------------------------------------------------------------- [0.62] [0.77372941] ---------------------------------------------------------------------------------------------------- [0.75] [0.91593334] ---------------------------------------------------------------------------------------------------- [0.7] [0.66951567] ---------------------------------------------------------------------------------------------------- [0.58] [0.82852018] ----------------------------------------------------------------------------------------------------
import warnings
warnings.simplefilter("ignore")
# sample LRR when used with random perturbation
search_function='randompertubation'
for i in [3,5,7,20,50,100,83,25,163,127]:
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
start = time.time()
start = time.time()
try:
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = search_function,
top_k = 1000,
max_rules=2000,
max_iter =None,
cv=5,
explainer='LRR',
debug = False)
end = time.time()
print('time spent to train LRR:',str(end-start),'secs')
# local_model = pyExp_obj['local_model']
# print('------------------Explanation from local model-------------------------')
# print(local_model.explain())
print('-'*100)
except:
print('-'*100)
print('there is only 1 class in the generated instances')
print('-'*100)
time spent to train LRR: 1.4275202751159668 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 1.738269329071045 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 1.0873517990112305 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 0.9002327919006348 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 1.3496229648590088 secs ----------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis) 1473 try: -> 1474 return self.obj._take_with_is_copy(key, axis=axis) 1475 except IndexError as err: ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py in _take_with_is_copy(self, indices, axis) 3599 """ -> 3600 result = self.take(indices=indices, axis=axis) 3601 # Maybe set copy if we didn't actually change the index. ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py in take(self, indices, axis, is_copy, **kwargs) 3585 -> 3586 new_data = self._mgr.take( 3587 indices, axis=self._get_block_manager_axis(axis), verify=True ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/internals/managers.py in take(self, indexer, axis, verify, convert) 1466 if convert: -> 1467 indexer = maybe_convert_indices(indexer, n) 1468 ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexers.py in maybe_convert_indices(indices, n) 264 if mask.any(): --> 265 raise IndexError("indices are out-of-bounds") 266 return indices IndexError: indices are out-of-bounds The above exception was the direct cause of the following exception: IndexError Traceback (most recent call last) <ipython-input-30-73e92f53bfa7> in <module> 4 5 for i in [3,5,7,20,50,100,83,25,163,127]: ----> 6 X_explain = feature_df.iloc[[i]] 7 y_explain = test_label.iloc[[i]] 8 ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key) 893 894 maybe_callable = com.apply_if_callable(key, self.obj) --> 895 return self._getitem_axis(maybe_callable, axis=axis) 896 897 def _is_scalar_access(self, key: Tuple): ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1490 # a list of integers 1491 elif is_list_like_indexer(key): -> 1492 return self._get_list_axis(key, axis=axis) 1493 1494 # a single integer ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis) 1475 except IndexError as err: 1476 # re-raise with different error message -> 1477 raise IndexError("positional indexers are out-of-bounds") from err 1478 1479 def _getitem_axis(self, key, axis: int): IndexError: positional indexers are out-of-bounds
# sample LRR when used with crossover interpolation
search_function='crossoverinterpolation'
for i in [3,5,7,20,50,100,83,25,163,127]:
X_explain = feature_df.iloc[[i]]
y_explain = test_label.iloc[[i]]
row_index = str(X_explain.index[0])
start = time.time()
start = time.time()
pyExp_obj = pyExp.explain(X_explain,
y_explain,
search_function = search_function,
top_k = 1000,
max_rules=2000,
max_iter =None,
cv=5,
explainer='LRR',
debug = False)
end = time.time()
print('time spent to train LRR:',str(end-start),'secs')
# local_model = pyExp_obj['local_model']
# print('------------------Explanation from local model-------------------------')
# print(local_model.explain())
print('-'*100)
time spent to train LRR: 28.87514853477478 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 26.213839769363403 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 25.021041870117188 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 29.421844244003296 secs ---------------------------------------------------------------------------------------------------- time spent to train LRR: 26.97343945503235 secs ----------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis) 1473 try: -> 1474 return self.obj._take_with_is_copy(key, axis=axis) 1475 except IndexError as err: ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py in _take_with_is_copy(self, indices, axis) 3599 """ -> 3600 result = self.take(indices=indices, axis=axis) 3601 # Maybe set copy if we didn't actually change the index. ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py in take(self, indices, axis, is_copy, **kwargs) 3585 -> 3586 new_data = self._mgr.take( 3587 indices, axis=self._get_block_manager_axis(axis), verify=True ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/internals/managers.py in take(self, indexer, axis, verify, convert) 1466 if convert: -> 1467 indexer = maybe_convert_indices(indexer, n) 1468 ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexers.py in maybe_convert_indices(indices, n) 264 if mask.any(): --> 265 raise IndexError("indices are out-of-bounds") 266 return indices IndexError: indices are out-of-bounds The above exception was the direct cause of the following exception: IndexError Traceback (most recent call last) <ipython-input-31-b3ee45065751> in <module> 4 5 for i in [3,5,7,20,50,100,83,25,163,127]: ----> 6 X_explain = feature_df.iloc[[i]] 7 y_explain = test_label.iloc[[i]] 8 ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key) 893 894 maybe_callable = com.apply_if_callable(key, self.obj) --> 895 return self._getitem_axis(maybe_callable, axis=axis) 896 897 def _is_scalar_access(self, key: Tuple): ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis) 1490 # a list of integers 1491 elif is_list_like_indexer(key): -> 1492 return self._get_list_axis(key, axis=axis) 1493 1494 # a single integer ~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis) 1475 except IndexError as err: 1476 # re-raise with different error message -> 1477 raise IndexError("positional indexers are out-of-bounds") from err 1478 1479 def _getitem_axis(self, key, axis: int): IndexError: positional indexers are out-of-bounds
print(pyExp_obj.keys())
dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'X_explain_fb', 'indep', 'dep', 'local_model'])
# display(pyExp_obj['synthetic_data'].columns)
# print(feature_df.index)
local_model = pyExp_obj['local_model']
print(local_model.explain( maxCoeffs=None))
# print(local_model.z)
rule coefficient 0 (intercept) 1.21719 1 la <= 44.00 -2.10262 2 la <= 76.00 -0.561556
generated_instance = pyExp_obj['synthetic_data']
print(generated_instance.columns)
display(generated_instance)
MultiIndex([( 'la', '<=', 1.0), ( 'la', '<=', 2.0), ( 'la', '<=', 5.0), ( 'la', '<=', 9.0), ( 'la', '<=', 17.0), ( 'la', '<=', 27.0), ( 'la', '<=', 44.0), ( 'la', '<=', 76.0), ( 'la', '<=', 163.0), ( 'ld', '<=', 0.0), ... ('asawr', '<=', 0.433364602876798), ('rsawr', '<=', 0.18055330452007923), ('rsawr', '<=', 0.2564102564102564), ('rsawr', '<=', 0.3196254791765793), ('rsawr', '<=', 0.3754889178617992), ('rsawr', '<=', 0.4287529047714299), ('rsawr', '<=', 0.4816326530612245), ('rsawr', '<=', 0.5758975125536251), ('rsawr', '<=', 0.7078384798099763), ('rsawr', '<=', 0.8487118531623176)], names=['feature', 'operation', 'value'], length=165)
feature | la | ld | ... | asawr | rsawr | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
operation | <= | <= | ... | <= | <= | ||||||||||||||||
value | 1.000000 | 2.000000 | 5.000000 | 9.000000 | 17.000000 | 27.000000 | 44.000000 | 76.000000 | 163.000000 | 0.000000 | ... | 0.433365 | 0.180553 | 0.256410 | 0.319625 | 0.375489 | 0.428753 | 0.481633 | 0.575898 | 0.707838 | 0.848712 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 |
3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
4 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2116 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
2117 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
2118 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
2119 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
2120 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
2121 rows × 165 columns
print(generated_instance.loc[:, (generated_instance.columns.get_level_values(0)=='la') &
(generated_instance.columns.get_level_values(1)=='<=') &
(generated_instance.columns.get_level_values(2)==44.0) ])
feature la operation <= value 44.0 0 0 1 0 2 0 3 0 4 1 ... ... 2116 1 2117 0 2118 1 2119 1 2120 0 [2121 rows x 1 columns]
# # search_function = 'crossoverinterpolation' # 'randompertubation' or 'crossoverinterpolation'
# search_functions = ['randompertubation', 'crossoverinterpolation']
# for i in range(0,len(feature_df)):
# X_explain = feature_df.iloc[[i]]
# y_explain = test_label.iloc[[i]]
# row_index = str(X_explain.index[0])
# try:
# pyExp_obj = pyExp.explain(X_explain,
# y_explain,
# search_function = search_function,
# top_k = 1000,
# max_rules=2000,
# max_iter =None,
# cv=5,
# debug = False)
# pickle.dump(pyExp_obj, open(pyExp_dir+search_function+'_'+row_index+'.pkl','wb'))
# synt_pred = pyExp_obj['synthetic_predictions']
# print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)),
# str(len(synt_pred))))
# # print('finished', row_index)
# except:
# problem_index.append(row_index)
# # print(row_index)
# break
# explain_index = 13
# X_explain = feature_df.iloc[[explain_index]]
# X_explain
# y_explain = test_label.iloc[[explain_index]]
# y_explain
# search_function = 'crossoverinterpolation' # 'randompertubation' or 'crossoverinterpolation''
# start = time.time()
# create_pyExp_rule_obj = pyExp.explain(X_explain,
# y_explain,
# search_function = search_function,
# top_k = 1000,
# max_rules=2000,
# max_iter =None,
# cv=5,
# debug = False)
# end = time.time()
# print('time spent {}'.format(str(end-start)))
# pickle.dump(create_pyExp_rule_obj, open(pyExp_dir+search_function+'_'+str(explain_index)+'.pkl','wb'))
# display(create_pyExp_rule_obj['synthetic_data'])
# # print(create_pyExp_rule_obj['synthetic_predictions'])
# # print(np.sum(create_pyExp_rule_obj['synthetic_predictions']))
# display(create_pyExp_rule_obj.keys())
# print(create_pyExp_rule_obj['synthetic_predictions'])
pyexp_obj = pickle.load(open(pyExp_dir+'openstack_rulefit_crossoverinterpolation_2a4e50caeaa271937a23ab7c052c7e9c47c1933f_200_rules.pkl','rb'))
print(pyexp_obj.keys())
dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'top_k_positive_rules', 'top_k_negative_rules', 'commit_id', 'local_model'])
local_model = pyexp_obj['local_model']
rule = local_model.get_rules()
# print(rule)
rule = rule[rule['type']=='rule']
print(rule)
rule type coef \ 13 ndev > -0.9049999713897705 & rsawr <= 0.075000... rule -0.035452 14 la <= 73.2599983215332 rule -0.005483 15 la > 104.8949966430664 & rrexp <= 425.71501159... rule -0.034989 16 la > 104.8949966430664 & ns > 1.32500004768371... rule -0.028039 17 ns > 1.4300000071525574 & la > 87.27999877929688 rule -0.008070 .. ... ... ... 206 la > 49.97999954223633 & asawr <= 0.0049999998... rule -0.044510 207 la > 67.8499984741211 & rrexp > 386.1300048828... rule -0.076416 208 la > 55.260000228881836 & rsawr > 0.0899999998... rule 0.103231 209 la <= 55.14999961853027 rule -0.006513 210 age <= -0.10999999940395355 & ns <= 1.48000001... rule -0.044921 support importance 13 0.029333 0.005982 14 0.157333 0.001996 15 0.120000 0.011370 16 0.029333 0.004731 17 0.074667 0.002121 .. ... ... 206 0.226667 0.018635 207 0.168000 0.028570 208 0.754667 0.044419 209 0.136000 0.002233 210 0.090667 0.012898 [198 rows x 5 columns]
display(rule.sort_values(by='importance',ascending=False))
rule | type | coef | support | importance | |
---|---|---|---|---|---|
27 | ns <= 1.350000023841858 & ent <= 0.90500000119... | rule | 0.150145 | 0.634667 | 0.072299 |
125 | la > 23.53499984741211 & asawr > 0.00499999988... | rule | 0.116750 | 0.501333 | 0.058375 |
24 | asawr <= 0.13499999791383743 & la > 51.75 & ns... | rule | 0.121400 | 0.653333 | 0.057775 |
182 | ent <= 0.925000011920929 & asawr > 0.004999999... | rule | 0.112495 | 0.608000 | 0.054920 |
126 | app <= 3.9550000429153442 & ns <= 1.4550000429... | rule | 0.110274 | 0.594667 | 0.054140 |
... | ... | ... | ... | ... | ... |
134 | la > 46.94999885559082 & ns > 1.5849999785423279 | rule | -0.003994 | 0.066667 | 0.000996 |
204 | la <= 66.48500061035156 | rule | 0.002735 | 0.128000 | 0.000914 |
191 | la <= 56.47999954223633 | rule | -0.002193 | 0.160000 | 0.000804 |
132 | ns > 1.6449999809265137 & la > 73.2599983215332 | rule | 0.003284 | 0.045333 | 0.000683 |
77 | la <= 67.8499984741211 | rule | 0.000101 | 0.165333 | 0.000038 |
198 rows × 5 columns
display(rule.sort_values(by='coef',ascending=False))
rule | type | coef | support | importance | |
---|---|---|---|---|---|
540 | rtime > 40.69499969482422 & app > 2.0299999713... | rule | 0.977063 | 0.283333 | 0.440281 |
588 | asawr > 0.044999999925494194 & la <= 2245.4250... | rule | 0.948651 | 0.386667 | 0.461980 |
690 | app <= 3.975000023841858 & age <= 0.8499999940... | rule | 0.941952 | 0.006667 | 0.076653 |
882 | nd > 2.3850001096725464 & rrexp <= 2276.844970... | rule | 0.866176 | 0.700000 | 0.396932 |
527 | nrev <= 15.140000343322754 & nd > 4.2650001049... | rule | 0.853926 | 0.016667 | 0.109319 |
... | ... | ... | ... | ... | ... |
1054 | rsawr <= 0.3050000071525574 & rtime > 7.904999... | rule | -0.889239 | 0.096667 | 0.262773 |
980 | app <= 2.0049999952316284 & rrexp > 849.420013... | rule | -0.909646 | 0.110000 | 0.284619 |
1114 | la > 204.33499908447266 & la > 33.825000762939... | rule | -1.049840 | 0.263333 | 0.462393 |
331 | ndev <= 67.1349983215332 & age > 2.54999995231... | rule | -1.339971 | 0.380000 | 0.650404 |
347 | nd > 4.265000104904175 & nrev > 15.14000034332... | rule | -1.504175 | 0.213333 | 0.616202 |
1863 rows × 5 columns
display(rule.sort_values(by=['coef'],ascending=[False]))
# sort by importance then get coef > 0
rule | type | coef | support | importance | |
---|---|---|---|---|---|
27 | ns <= 1.350000023841858 & ent <= 0.90500000119... | rule | 0.150145 | 0.634667 | 0.072299 |
24 | asawr <= 0.13499999791383743 & la > 51.75 & ns... | rule | 0.121400 | 0.653333 | 0.057775 |
125 | la > 23.53499984741211 & asawr > 0.00499999988... | rule | 0.116750 | 0.501333 | 0.058375 |
182 | ent <= 0.925000011920929 & asawr > 0.004999999... | rule | 0.112495 | 0.608000 | 0.054920 |
126 | app <= 3.9550000429153442 & ns <= 1.4550000429... | rule | 0.110274 | 0.594667 | 0.054140 |
... | ... | ... | ... | ... | ... |
150 | asawr <= 0.004999999888241291 | rule | -0.082666 | 0.253333 | 0.035953 |
45 | la > 46.64999961853027 & app > 3.9550000429153442 | rule | -0.082695 | 0.149333 | 0.029474 |
22 | ns <= 1.4550000429153442 & app > 3.95500004291... | rule | -0.088113 | 0.157333 | 0.032083 |
177 | asawr > 0.13499999791383743 & la > 51.75 & ns ... | rule | -0.100196 | 0.141333 | 0.034905 |
136 | rtime > 96.99500274658203 & app > 2.9750000238... | rule | -0.127815 | 0.064000 | 0.031283 |
198 rows × 5 columns
display(list(rule[rule['coef']>0].sort_values(by=['coef','importance'],ascending=[False,False])['rule']))
['ns <= 1.350000023841858 & ent <= 0.9050000011920929 & asawr > 0.004999999888241291', 'asawr <= 0.13499999791383743 & la > 51.75 & ns <= 1.4300000071525574', 'la > 23.53499984741211 & asawr > 0.004999999888241291 & app <= 3.9000000953674316', 'ent <= 0.925000011920929 & asawr > 0.004999999888241291 & la > 26.139999389648438', 'app <= 3.9550000429153442 & ns <= 1.4550000429153442 & la > 82.54999923706055', 'ns <= 1.4749999642372131 & ndev > 3.1200000047683716 & la > 44.34499931335449', 'la > 87.41499710083008 & ent <= 0.9749999940395355 & la <= 1110.3800048828125', 'app <= 3.9550000429153442 & la > 46.64999961853027 & ns <= 1.4399999976158142', 'la > 55.260000228881836 & rsawr > 0.08999999985098839 & rtime <= 148.375', 'rsawr > 0.08500000089406967 & la > 88.8650016784668 & rrexp > -407.61500549316406', 'la > 85.48500061035156 & ent <= 0.9600000083446503 & la <= 975.3250122070312', 'la > 67.8499984741211 & rtime <= 72.5099983215332 & rrexp > 386.1300048828125', 'asawr > 0.004999999888241291 & la <= 917.9400024414062 & la > 55.760000228881836', 'ndev > -0.9049999713897705 & rsawr > 0.07500000111758709 & la > 56.35499954223633', 'la > 45.53499984741211 & app <= 3.975000023841858 & ns <= 1.4850000143051147', 'la > 13.130000591278076 & age > -2.5549999475479126 & ns <= 1.425000011920929', 'asawr > -0.014999999664723873 & la <= 1089.7150268554688 & la > 43.68000030517578', 'la > 104.8949966430664 & rrexp > 425.7150115966797 & ns <= 1.3250000476837158', 'la <= 1146.614990234375 & la > 46.77499961853027 & ent <= 0.9549999833106995', 'ns <= 1.3650000095367432 & asawr > 0.004999999888241291 & la > 15.87999963760376', 'la > 37.34499931335449 & nrev <= 46.760000228881836 & ns <= 1.4649999737739563', 'la > 12.25499963760376 & ns <= 1.4550000429153442 & ndev > -10.069999933242798', 'la > 84.95000076293945 & ent <= 0.9350000023841858 & asawr > -0.024999999441206455', 'la > 46.78999900817871 & ns <= 1.5 & ndev > -0.9549999833106995', 'ns <= 1.4449999928474426 & ent <= 0.9350000023841858 & la > -11.50499963760376', 'la > 44.26499938964844 & ns <= 1.4950000047683716 & rrexp > 423.87998962402344', 'la > 46.0049991607666 & rrexp > 426.10499572753906 & ns <= 1.4699999690055847', 'ent <= 0.925000011920929 & la > 83.50500106811523 & ns <= 1.4900000095367432', 'la > 87.27999877929688 & app <= 4.924999952316284 & ns <= 1.4300000071525574', 'ns <= 1.4800000190734863 & age > -0.10999999940395355 & la > 87.01499938964844', 'rtime <= 96.99500274658203 & la > 48.40999984741211', 'ent <= 0.9549999833106995 & ns <= 1.4800000190734863 & la > 83.02000045776367', 'ns <= 1.465000033378601 & age > -0.014999999664723873 & la > 55.06999969482422', 'asawr > 0.004999999888241291 & la > 49.97999954223633 & ns <= 1.465000033378601', 'la > 83.20499801635742 & ns <= 1.5049999952316284 & ent <= 0.9600000083446503', 'ns <= 1.4449999928474426 & asawr > 0.004999999888241291 & la > 51.55999946594238', 'ndev > -1.8149999976158142 & ns <= 1.5049999952316284 & la > 61.70000076293945', 'la > 46.709999084472656 & ndev > -7.425000190734863 & rtime <= 131.2300033569336', 'app <= 2.975000023841858 & rtime > 96.99500274658203 & la > 48.40999984741211', 'la > 55.13999938964844 & rtime <= 172.20000457763672 & ns <= 1.48499995470047', 'ns <= 1.39000004529953 & ent <= 0.9600000083446503 & la > 85.54500198364258', 'ent <= 0.925000011920929 & la > 51.55999946594238 & ns <= 1.4600000381469727', 'la > 46.54999923706055 & ent <= 0.8550000190734863 & ns <= 1.4399999976158142', 'la > 46.94999885559082 & app <= 4.974999904632568 & ns <= 1.5849999785423279', 'ns <= 1.6449999809265137 & asawr > 0.004999999888241291 & la > 73.2599983215332', 'rrexp > 430.3350067138672 & ns <= 1.5449999570846558 & la > 56.47999954223633', 'la > 70.68000030517578 & ns <= 1.3949999809265137 & ent <= 0.8149999976158142', 'ent <= 0.9350000023841858 & la > 46.64999961853027 & ns <= 1.3450000286102295', 'la > 55.14999961853027 & ns <= 1.465000033378601 & nrev > 6.015000104904175', 'age > -7.080000162124634 & la > 98.27499771118164 & ns <= 1.175000011920929', 'asawr > -0.014999999664723873 & ns <= 1.5 & la > 66.48500061035156', 'ns > 1.175000011920929 & la > 98.27499771118164', 'la <= 61.70000076293945', 'ns > 1.6449999809265137 & la > 73.2599983215332', 'la <= 66.48500061035156', 'la <= 67.8499984741211']
len(rule[rule['coef']<0].sort_values(by=['importance'],ascending=False))
142