from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
!pip install lime
Requirement already satisfied: lime in /usr/local/lib/python3.7/dist-packages (0.2.0.1) Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from lime) (1.21.5) Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from lime) (4.63.0) Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from lime) (3.2.2) Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.7/dist-packages (from lime) (1.0.2) Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from lime) (1.4.1) Requirement already satisfied: scikit-image>=0.12 in /usr/local/lib/python3.7/dist-packages (from lime) (0.18.3) Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.12->lime) (2021.11.2) Requirement already satisfied: networkx>=2.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.12->lime) (2.6.3) Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.12->lime) (1.3.0) Requirement already satisfied: pillow!=7.1.0,!=7.1.1,>=4.3.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.12->lime) (7.1.2) Requirement already satisfied: imageio>=2.3.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.12->lime) (2.4.1) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->lime) (2.8.2) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->lime) (0.11.0) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->lime) (1.4.0) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->lime) (3.0.7) Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->lime) (3.10.0.2) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->lime) (1.15.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18->lime) (3.1.0) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18->lime) (1.1.0)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
import warnings
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import lime
from lime import lime_tabular
warnings.filterwarnings('ignore')
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Dataset/Epileptic Seizure Recognition.csv")
df
Unnamed | X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | ... | X170 | X171 | X172 | X173 | X174 | X175 | X176 | X177 | X178 | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | X21.V1.791 | 135 | 190 | 229 | 223 | 192 | 125 | 55 | -9 | -33 | ... | -17 | -15 | -31 | -77 | -103 | -127 | -116 | -83 | -51 | 4 |
1 | X15.V1.924 | 386 | 382 | 356 | 331 | 320 | 315 | 307 | 272 | 244 | ... | 164 | 150 | 146 | 152 | 157 | 156 | 154 | 143 | 129 | 1 |
2 | X8.V1.1 | -32 | -39 | -47 | -37 | -32 | -36 | -57 | -73 | -85 | ... | 57 | 64 | 48 | 19 | -12 | -30 | -35 | -35 | -36 | 5 |
3 | X16.V1.60 | -105 | -101 | -96 | -92 | -89 | -95 | -102 | -100 | -87 | ... | -82 | -81 | -80 | -77 | -85 | -77 | -72 | -69 | -65 | 5 |
4 | X20.V1.54 | -9 | -65 | -98 | -102 | -78 | -48 | -16 | 0 | -21 | ... | 4 | 2 | -12 | -32 | -41 | -65 | -83 | -89 | -73 | 5 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
11495 | X22.V1.114 | -22 | -22 | -23 | -26 | -36 | -42 | -45 | -42 | -45 | ... | 15 | 16 | 12 | 5 | -1 | -18 | -37 | -47 | -48 | 2 |
11496 | X19.V1.354 | -47 | -11 | 28 | 77 | 141 | 211 | 246 | 240 | 193 | ... | -65 | -33 | -7 | 14 | 27 | 48 | 77 | 117 | 170 | 1 |
11497 | X8.V1.28 | 14 | 6 | -13 | -16 | 10 | 26 | 27 | -9 | 4 | ... | -65 | -48 | -61 | -62 | -67 | -30 | -2 | -1 | -8 | 5 |
11498 | X10.V1.932 | -40 | -25 | -9 | -12 | -2 | 12 | 7 | 19 | 22 | ... | 121 | 135 | 148 | 143 | 116 | 86 | 68 | 59 | 55 | 3 |
11499 | X16.V1.210 | 29 | 41 | 57 | 72 | 74 | 62 | 54 | 43 | 31 | ... | -59 | -25 | -4 | 2 | 5 | 4 | -2 | 2 | 20 | 4 |
11500 rows × 180 columns
X = df.iloc[:,1:-1]
y = df.iloc[:,-1:]
Feature
X
X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 | ... | X169 | X170 | X171 | X172 | X173 | X174 | X175 | X176 | X177 | X178 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 135 | 190 | 229 | 223 | 192 | 125 | 55 | -9 | -33 | -38 | ... | 8 | -17 | -15 | -31 | -77 | -103 | -127 | -116 | -83 | -51 |
1 | 386 | 382 | 356 | 331 | 320 | 315 | 307 | 272 | 244 | 232 | ... | 168 | 164 | 150 | 146 | 152 | 157 | 156 | 154 | 143 | 129 |
2 | -32 | -39 | -47 | -37 | -32 | -36 | -57 | -73 | -85 | -94 | ... | 29 | 57 | 64 | 48 | 19 | -12 | -30 | -35 | -35 | -36 |
3 | -105 | -101 | -96 | -92 | -89 | -95 | -102 | -100 | -87 | -79 | ... | -80 | -82 | -81 | -80 | -77 | -85 | -77 | -72 | -69 | -65 |
4 | -9 | -65 | -98 | -102 | -78 | -48 | -16 | 0 | -21 | -59 | ... | 10 | 4 | 2 | -12 | -32 | -41 | -65 | -83 | -89 | -73 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
11495 | -22 | -22 | -23 | -26 | -36 | -42 | -45 | -42 | -45 | -49 | ... | 20 | 15 | 16 | 12 | 5 | -1 | -18 | -37 | -47 | -48 |
11496 | -47 | -11 | 28 | 77 | 141 | 211 | 246 | 240 | 193 | 136 | ... | -94 | -65 | -33 | -7 | 14 | 27 | 48 | 77 | 117 | 170 |
11497 | 14 | 6 | -13 | -16 | 10 | 26 | 27 | -9 | 4 | 14 | ... | -42 | -65 | -48 | -61 | -62 | -67 | -30 | -2 | -1 | -8 |
11498 | -40 | -25 | -9 | -12 | -2 | 12 | 7 | 19 | 22 | 29 | ... | 114 | 121 | 135 | 148 | 143 | 116 | 86 | 68 | 59 | 55 |
11499 | 29 | 41 | 57 | 72 | 74 | 62 | 54 | 43 | 31 | 23 | ... | -94 | -59 | -25 | -4 | 2 | 5 | 4 | -2 | 2 | 20 |
11500 rows × 178 columns
Label
y
y | |
---|---|
0 | 4 |
1 | 1 |
2 | 5 |
3 | 5 |
4 | 5 |
... | ... |
11495 | 2 |
11496 | 1 |
11497 | 5 |
11498 | 3 |
11499 | 4 |
11500 rows × 1 columns
def toBinary(x):
if x != 1: return 0;
else: return 1;
y = y['y'].apply(toBinary)
y = pd.DataFrame(data=y)
y
y | |
---|---|
0 | 0 |
1 | 1 |
2 | 0 |
3 | 0 |
4 | 0 |
... | ... |
11495 | 0 |
11496 | 1 |
11497 | 0 |
11498 | 0 |
11499 | 0 |
11500 rows × 1 columns
# scaler = StandardScaler()
# X = scaler.fit_transform(X)
X
X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 | ... | X169 | X170 | X171 | X172 | X173 | X174 | X175 | X176 | X177 | X178 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 135 | 190 | 229 | 223 | 192 | 125 | 55 | -9 | -33 | -38 | ... | 8 | -17 | -15 | -31 | -77 | -103 | -127 | -116 | -83 | -51 |
1 | 386 | 382 | 356 | 331 | 320 | 315 | 307 | 272 | 244 | 232 | ... | 168 | 164 | 150 | 146 | 152 | 157 | 156 | 154 | 143 | 129 |
2 | -32 | -39 | -47 | -37 | -32 | -36 | -57 | -73 | -85 | -94 | ... | 29 | 57 | 64 | 48 | 19 | -12 | -30 | -35 | -35 | -36 |
3 | -105 | -101 | -96 | -92 | -89 | -95 | -102 | -100 | -87 | -79 | ... | -80 | -82 | -81 | -80 | -77 | -85 | -77 | -72 | -69 | -65 |
4 | -9 | -65 | -98 | -102 | -78 | -48 | -16 | 0 | -21 | -59 | ... | 10 | 4 | 2 | -12 | -32 | -41 | -65 | -83 | -89 | -73 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
11495 | -22 | -22 | -23 | -26 | -36 | -42 | -45 | -42 | -45 | -49 | ... | 20 | 15 | 16 | 12 | 5 | -1 | -18 | -37 | -47 | -48 |
11496 | -47 | -11 | 28 | 77 | 141 | 211 | 246 | 240 | 193 | 136 | ... | -94 | -65 | -33 | -7 | 14 | 27 | 48 | 77 | 117 | 170 |
11497 | 14 | 6 | -13 | -16 | 10 | 26 | 27 | -9 | 4 | 14 | ... | -42 | -65 | -48 | -61 | -62 | -67 | -30 | -2 | -1 | -8 |
11498 | -40 | -25 | -9 | -12 | -2 | 12 | 7 | 19 | 22 | 29 | ... | 114 | 121 | 135 | 148 | 143 | 116 | 86 | 68 | 59 | 55 |
11499 | 29 | 41 | 57 | 72 | 74 | 62 | 54 | 43 | 31 | 23 | ... | -94 | -59 | -25 | -4 | 2 | 5 | 4 | -2 | 2 | 20 |
11500 rows × 178 columns
y
y | |
---|---|
0 | 0 |
1 | 1 |
2 | 0 |
3 | 0 |
4 | 0 |
... | ... |
11495 | 0 |
11496 | 1 |
11497 | 0 |
11498 | 0 |
11499 | 0 |
11500 rows × 1 columns
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
x_test.iloc[1]
X1 -101 X2 -121 X3 -134 X4 -148 X5 -152 ... X174 -16 X175 -4 X176 9 X177 16 X178 19 Name: 4458, Length: 178, dtype: int64
Training data accuracy evaluation
clf = LogisticRegression() #initializing logistic regression
clf.fit(x_train, y_train) #training the model with train data(input, output)
acc_log_reg = clf.score(x_train, y_train) * 100
print(round(acc_log_reg,2), "%")
67.45 %
Test Data accuracy evaluation
y_pred_log_reg = clf.predict(x_test)
acc_log_reg2 = round(clf.score(x_test, y_test) * 100, 2)
print(acc_log_reg2, "%")
64.17 %
*Model Report*
predictions = clf.predict(x_test)
print(classification_report(y_test, predictions))
precision recall f1-score support 0 0.82 0.70 0.76 2740 1 0.26 0.41 0.32 710 accuracy 0.64 3450 macro avg 0.54 0.56 0.54 3450 weighted avg 0.71 0.64 0.67 3450
*Model interpretation (LIME)*
explainer = lime_tabular.LimeTabularExplainer(
training_data=np.array(x_train),
feature_names=x_train.columns,
class_names=[0, 1],
mode='classification'
)
exp = explainer.explain_instance(
data_row=x_test.iloc[0],
predict_fn=clf.predict_proba
)
exp.show_in_notebook(show_table=True)
Training data accuracy evaluation
clf = SVC(probability=True) #initializing svm classifier
clf.fit(x_train, y_train) #training the model with train data(input, output)
acc_svc1 = clf.score(x_train, y_train) * 100
print(round(acc_svc1,2), '%')
98.07 %
Test Data accuracy evaluation
y_pred_svc = clf.predict(x_test)
acc_svc2 = round(clf.score(x_test, y_test) * 100, 2)
print(acc_svc2, "%")
97.3 %
*Model Report*
predictions = clf.predict(x_test)
print(classification_report(y_test, predictions))
precision recall f1-score support 0 0.98 0.99 0.98 2740 1 0.96 0.90 0.93 710 accuracy 0.97 3450 macro avg 0.97 0.95 0.96 3450 weighted avg 0.97 0.97 0.97 3450
*Model interpretation (LIME)*
exp = explainer.explain_instance(
data_row=x_test.iloc[2222],
predict_fn=clf.predict_proba
)
exp.show_in_notebook(show_table=True)
Train Data accuracy evaluation
clf = LinearSVC() #initializing svm classifier
# clf = SVC(kernel='linear',probability=True)
clf.fit(x_train, y_train) #training the model with train data(input, output)
acc_linear_svc1 = clf.score(x_train, y_train) * 100
print(round(acc_linear_svc1,2), '%')
85.3 %
Test Data accuracy evaluation
y_pred_linear_svc = clf.predict(x_test)
acc_linear_svc2 = round(clf.score(x_test, y_test) * 100, 2)
print(acc_linear_svc2, "%")
84.55 %
*Model Report*
predictions = clf.predict(x_test)
print(classification_report(y_test, predictions))
precision recall f1-score support 0 0.84 1.00 0.91 2740 1 0.94 0.27 0.41 710 accuracy 0.85 3450 macro avg 0.89 0.63 0.66 3450 weighted avg 0.86 0.85 0.81 3450
Train Data accuracy evaluation
clf = KNeighborsClassifier() #initializing svm classifier
clf.fit(x_train, y_train) #training the model with train data(input, output)
acc_knn1 = clf.score(x_train, y_train) * 100
print(round(acc_knn1,2), '%')
93.7 %
Test Data accuracy evaluation
y_pred_knn = clf.predict(x_test)
acc_knn2 = round(clf.score(x_test, y_test) * 100, 2)
print(acc_knn2, "%")
91.8 %
*Model Report*
predictions = clf.predict(x_test)
print(classification_report(y_test, predictions))
precision recall f1-score support 0 0.91 1.00 0.95 2740 1 0.99 0.61 0.75 710 accuracy 0.92 3450 macro avg 0.95 0.80 0.85 3450 weighted avg 0.92 0.92 0.91 3450
*Model interpretation (LIME)*
exp = explainer.explain_instance(
data_row=x_test.iloc[2222],
predict_fn=clf.predict_proba
)
exp.show_in_notebook(show_table=True)
y = to_categorical(y)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
X_train = x_train
X_test = x_test
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_train = np.reshape(x_train, (x_train.shape[0],1,X.shape[1]))
x_test = scaler.fit_transform(x_test)
x_test = np.reshape(x_test, (x_test.shape[0],1,X.shape[1]))
print(type(x_train))
print(type(x_test))
print(type(X_train))
print(type(X_test))
print(type(y_train))
print(type(y_test))
<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
tf.keras.backend.clear_session()
model = Sequential()
model.add(LSTM(64, input_shape=(1,178),activation="relu",return_sequences=True))
model.add(LSTM(32,activation="sigmoid"))
model.add(Dense(2, activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = "adam", metrics = ['accuracy'])
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm (LSTM) (None, 1, 64) 62208 lstm_1 (LSTM) (None, 32) 12416 dense (Dense) (None, 2) 66 ================================================================= Total params: 74,690 Trainable params: 74,690 Non-trainable params: 0 _________________________________________________________________
history = model.fit(x_train, y_train, epochs = 10)
Epoch 1/10 288/288 [==============================] - 3s 4ms/step - loss: 0.3133 - accuracy: 0.8862 Epoch 2/10 288/288 [==============================] - 1s 4ms/step - loss: 0.1058 - accuracy: 0.9708 Epoch 3/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0699 - accuracy: 0.9808 Epoch 4/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0518 - accuracy: 0.9849 Epoch 5/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0414 - accuracy: 0.9883 Epoch 6/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0328 - accuracy: 0.9905 Epoch 7/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0248 - accuracy: 0.9930 Epoch 8/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0187 - accuracy: 0.9948 Epoch 9/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0139 - accuracy: 0.9968 Epoch 10/10 288/288 [==============================] - 1s 4ms/step - loss: 0.0112 - accuracy: 0.9970
Train data
scoreTrain, accTrain = model.evaluate(x_train, y_train)
print(round(accTrain*100, 2), '%')
288/288 [==============================] - 1s 2ms/step - loss: 0.0098 - accuracy: 0.9973 99.73 %
Test Data
scoreTest, accTest = model.evaluate(x_test, y_test)
print(round(accTest*100, 2), '%')
72/72 [==============================] - 0s 2ms/step - loss: 0.0906 - accuracy: 0.9743 97.43 %
Individual check
print(x_test[22,:])
print(y_test[22,:])
[[-0.27394654 -0.32804627 -0.42904765 -0.48753605 -0.54755226 -0.58983591 -0.58537214 -0.55949834 -0.43968008 -0.37293693 -0.34310263 -0.31993666 -0.27848473 -0.2196931 -0.17781191 -0.12804085 -0.14068347 -0.07507794 -0.00790428 0.04495067 0.02981974 0.02817117 0.06060217 0.05034591 0.08619392 0.10422928 0.07772134 0.08832882 0.03174449 -0.01619464 -0.09960484 -0.13096325 -0.14681821 -0.1034075 -0.03104909 0.02851506 0.08955337 0.0473092 -0.04005323 -0.09391386 -0.18231731 -0.2486911 -0.23296083 -0.16163196 -0.06551701 -0.01223946 -0.00294793 -0.05713513 -0.09766821 -0.15842429 -0.19149529 -0.28554208 -0.3881192 -0.44921689 -0.51665123 -0.5920847 -0.63233076 -0.6463007 -0.60515044 -0.50495606 -0.40408235 -0.34114291 -0.30242893 -0.27671821 -0.24965612 -0.17529294 -0.11588091 -0.0099983 0.0346715 0.06507569 0.0726249 0.06318569 0.13827268 0.18620243 0.29304884 0.29172423 0.32491785 0.34922817 0.3499471 0.30923951 0.33703713 0.36986291 0.37389442 0.40242202 0.36420472 0.32141731 0.23435624 0.17261021 0.14177181 0.08849791 0.08802865 0.07502712 0.03273347 0.02470467 0.06094804 0.07442878 0.09438576 0.12220139 0.14663054 0.18559306 0.2373233 0.31841494 0.3954984 0.38146073 0.31815601 0.23530614 0.16327116 0.10122072 0.08959236 0.00218855 -0.04991878 -0.11931541 -0.16827824 -0.20825556 -0.27909706 -0.31712507 -0.41086265 -0.45586123 -0.45727768 -0.41683662 -0.3312998 -0.20453536 -0.10251652 0.01370532 0.08119537 0.13717106 0.15654614 0.11208445 0.04614337 -0.03447503 -0.07907902 -0.12033642 -0.18931062 -0.24672544 -0.32293709 -0.37215495 -0.39881472 -0.37276288 -0.33425137 -0.27855264 -0.27556763 -0.25519566 -0.20283903 -0.16155683 -0.118237 -0.13050738 -0.16296491 -0.16747638 -0.20614067 -0.25770189 -0.25909891 -0.22674276 -0.26381383 -0.2440509 -0.25590711 -0.22570517 -0.19770466 -0.15983697 -0.14697078 -0.10003764 -0.03386954 -0.04347877 -0.05469603 -0.12379429 -0.16004472 -0.22487 -0.2955943 -0.329563 -0.36461738 -0.36340312 -0.3303952 -0.27704948 -0.27589093 -0.2938621 -0.30298402 -0.30966416 -0.29840958 -0.30337485]] [1. 0.]
scoreTest, accTest = model.evaluate(x_test[[44],:], y_test[[44],:])
print(round(accTest*100, 2), '%')
1/1 [==============================] - 0s 21ms/step - loss: 1.0610e-05 - accuracy: 1.0000 100.0 %
print(model.predict(x_test[[44],:]))
[[9.9998939e-01 1.0614717e-05]]
*Model interpretation (LIME)*
explainer = lime_tabular.RecurrentTabularExplainer(training_data = x_train,
feature_names = X_train.columns,
class_names = [0, 1],
mode='classification'
)
exp = explainer.explain_instance(np.array(X_test.iloc[123]), model.predict)
exp.show_in_notebook(show_table=True)