#!/usr/bin/env python # coding: utf-8 # Open In Colab # In[3]: import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression import sklearn import tensorflow as tf from tensorflow import keras from keras.models import Sequential from keras.layers import Dense from sklearn.metrics import accuracy_score # In[4]: dataset = pd.read_csv('/content/diabetes.csv') # In[5]: dataset.head()#viewing the head of the dataset # In[6]: dataset.info(verbose=True) # In[7]: #finding the shape of the dataset dataset.shape # In[8]: #finding out the null values of the dataset dataset.isnull().sum() # In[9]: f, ax = plt.subplots(1, 2, figsize = (12, 6)) f.suptitle("Diabetes?", fontsize = 18.) _ = dataset.Outcome.value_counts().plot.bar(ax = ax[0], rot = 0, color = (sns.color_palette()[0], sns.color_palette()[2])).set(xticklabels = ["No", "Yes"]) _ = dataset.Outcome.value_counts().plot.pie(labels = ("No", "Yes"), autopct = "%.2f%%", label = "", fontsize = 13., ax = ax[1],\ colors = (sns.color_palette()[0], sns.color_palette()[2]), wedgeprops = {"linewidth": 1.5, "edgecolor": "#F7F7F7"}), ax[1].texts[1].set_color("#F7F7F7"), ax[1].texts[3].set_color("#F7F7F7") # In[10]: #this is also a one way to find the missing values in the dataset dataset.describe() # In[11]: dataset.dtypes # In[12]: plt.style.use('classic') plot = dataset.hist(figsize = (20,20)) # In[13]: #Scatter matrix of uncleaned data sns.pairplot(dataset ) # In[14]: #Pair plot for clean data sns.pairplot(data=dataset,hue='Outcome',diag_kind='kde', kind="reg") plt.show() # In[15]: plt.figure(figsize = [10, 10]) sns.heatmap(dataset.corr(), annot = True, fmt = '.3f', cmap = 'vlag_r', center = 0); # In[16]: print(np.array(dataset)) # In[17]: # drop columns number 9 train_data_x = dataset.drop(columns = 'Outcome', axis = 1) train_data_y = dataset['Outcome'] # In[18]: train_Data, test_Data, train_Out, test_Out = train_test_split(train_data_x,train_data_y,test_size=0.2, random_state=10) print(train_Out) # In[19]: data_model = Sequential() data_model.add(Dense(100, input_dim=8, activation='sigmoid')) data_model.add(Dense(50, activation='sigmoid')) data_model.add(Dense(30, activation='sigmoid')) data_model.add(Dense(10, activation='sigmoid')) data_model.add(Dense(1, activation='sigmoid')) data_model.compile(loss='MSE', optimizer='adam', metrics=['accuracy']) print(data_model.summary()) # In[20]: # Compile model # using opimizer Adam # using loss binary_crossentropy # data_model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) data_model.compile(optimizer='Adam', loss="binary_crossentropy", metrics=['accuracy']) # In[21]: hist = data_model.fit(train_Data, train_Out, validation_data=(test_Data, test_Out), epochs=1000, verbose=1) # In[22]: # Train and Test accuracy scores = data_model.evaluate(train_Data,train_Out) print("Training Accuracy: %.2f%%\n" % (scores[1]*100)) scores = data_model.evaluate(test_Data,test_Out) print("Testing Accuracy: %.2f%%\n" % (scores[1]*100)) # In[23]: # Final Prediction # print(data_model.predict(test_Data)) # In[24]: y_pred = data_model.predict(test_Data) y_pred = (y_pred > 0.5) print(data_model.predict(test_Data)) # In[25]: acc = hist.history['accuracy'] loss = hist.history['loss'] plt.plot(hist.history['accuracy']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.show() plt.savefig('accuracy_curve.png') # In[26]: plt.plot(hist.history['loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.show() plt.savefig('loss_curve.png')