from google.colab import drive drive.mount('/content/drive') %cd /content/drive/MyDrive/Datasets/ import numpy as np import pandas as pd import tensorflow as tf from tensorflow import keras from keras.models import Sequential from keras.layers import Dense import matplotlib.pyplot as plt import seaborn as sns dataset = pd.read_csv('Diabetes.tsv', sep='\t') dataset.head() dataset.info() dataset.isnull().sum() f, ax = plt.subplots(1, 2, figsize = (12, 6)) f.suptitle("Diabetes?", fontsize = 18.) _ = dataset.Outcome.value_counts().plot.bar(ax = ax[0], rot = 0, color = (sns.color_palette()[0], sns.color_palette()[2])).set(xticklabels = ["No", "Yes"]) _ = dataset.Outcome.value_counts().plot.pie(labels = ("No", "Yes"), autopct = "%.2f%%", label = "", fontsize = 13., ax = ax[1],\ colors = (sns.color_palette()[0], sns.color_palette()[2]), wedgeprops = {"linewidth": 1.5, "edgecolor": "#F7F7F7"}), ax[1].texts[1].set_color("#F7F7F7"), ax[1].texts[3].set_color("#F7F7F7") fig, ax = plt.subplots(4,2, figsize=(16,16)) sns.distplot(dataset.Age, bins = 20, ax=ax[0,0]) sns.distplot(dataset.Preg, bins = 20, ax=ax[0,1]) sns.distplot(dataset.GLU, bins = 20, ax=ax[1,0]) sns.distplot(dataset.BP, bins = 20, ax=ax[1,1]) sns.distplot(dataset.ST, bins = 20, ax=ax[2,0]) sns.distplot(dataset.INS, bins = 20, ax=ax[2,1]) sns.distplot(dataset.BMI, bins = 20, ax=ax[3,0]) sns.distplot(dataset.DPF, bins = 20, ax=ax[3,1]) corr=dataset.corr() sns.set(font_scale=1.15) plt.figure(figsize=(14, 10)) sns.heatmap(corr, vmax=.8, linewidths=0.01, square=True,annot=True,cmap='YlGnBu',linecolor="black") plt.title('Correlation between features'); sns.pairplot(data=dataset, hue='Outcome') plt.show() x = dataset.iloc[:,0:8].values y = dataset.iloc[:, -1].values x y # from sklearn.preprocessing import MinMaxScaler # sc = MinMaxScaler() # x = sc.fit_transform(x) from sklearn.preprocessing import StandardScaler sc = StandardScaler() x = sc.fit_transform(x) x x.shape from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state=12) x_test.shape y_test.shape model = Sequential() model.add(Dense(32, input_dim=8, activation='softmax')) model.add(Dense(16, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='MSE', optimizer='adam', metrics=['accuracy']) model.summary() from keras.utils.vis_utils import plot_model from IPython.display import Image plot_model(model, to_file='nn_model_plot.png', show_shapes=True, show_layer_names=True) Image(retina=True, filename='nn_model_plot.png') hist = model.fit(x_train,y_train, epochs =100,validation_data=(x_test, y_test)) # Train and Test accuracy scores = model.evaluate(x_train,y_train) print("Training Accuracy: %.2f%%\n" % (scores[1]*100)) scores = model.evaluate(x_test,y_test) print("Testing Accuracy: %.2f%%\n" % (scores[1]*100)) y_pred = model.predict(x_test) y_pred = (y_pred > 0.5) print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)) from sklearn.metrics import confusion_matrix, accuracy_score cm = confusion_matrix(y_test, y_pred) print(cm) accuracy_score(y_test, y_pred) ax = sns.heatmap(cm, annot=True, xticklabels=["No Diabetes",'Diabetes'], yticklabels=['No Diabetes','Diabetes'], cbar=False,cmap='Blues') ax.set_xlabel('Prediction') ax.set_ylabel('Actual') plt.show() acc = hist.history['accuracy'] loss = hist.history['loss'] from matplotlib import pyplot as plt plt.plot(hist.history['accuracy']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.show() plt.savefig('accuracy_curve.png') plt.plot(hist.history['loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.show() plt.savefig('loss_curve.png')