#!/usr/bin/env python
# coding: utf-8

# # Manual Hyperparameter Tuning a Neural Network
# 
# ## Introduction
# In Challenge 4, we built a simple neural network using Keras. In this challenge, we will manually experiment with hyperarameters.
# 
# This notebook contains the same workflow. Step 2 is the section that shows the simple 3-layer neural network from chapter 4. Section three is for you to change the same neural network by adding additional layers, neurons, and changing the number of epochs - then running the plot graph cell to see the results.
# 
# NOTE: Be sure to re-run the begining cells before working on Section 3.
# 
# ## Dataset
# The advertising dataset captures the sales revenue generated with respect to advertisement costs across numerous platforms like radio, TV, and newspapers.
# 
# ### Features:
# 
# #### Digital: advertising dollars spent on Internet.
# #### TV: advertising dollars spent on TV.
# #### Radio: advertising dollars spent on Radio.
# #### Newspaper: advertising dollars spent on Newspaper.
# 
# ### Target (Label):
# #### Sales budget

# # Step 1: Data Preparation

# ### Import Libraries

# In[ ]:


# Import the necessary libraries

# For Data loading, Exploraotry Data Analysis, Graphing
import pandas as pd   # Pandas for data processing libraries
import numpy as np    # Numpy for mathematical functions

import matplotlib.pyplot as plt # Matplotlib for visualization tasks
import seaborn as sns # Seaborn for data visualization library based on matplotlib.
get_ipython().run_line_magic('matplotlib', 'inline')

import sklearn        # ML tasks
from sklearn.model_selection import train_test_split # Split the dataset
from sklearn.metrics import mean_squared_error  # Calculate Mean Squared Error

# Build the Network
from tensorflow import keras
from keras.models import Sequential
#from tensorflow.keras.models import Sequential
from keras.layers import Dense


# In[ ]:


# Next, you read the dataset into a Pandas dataframe.

url = 'https://github.com/LinkedInLearning/artificial-intelligence-foundations-neural-networks-4381282/blob/main/Advertising_2023.csv?raw=true'
advertising_df= pd.read_csv(url,index_col=0)


# In[ ]:


# Pandas info() function is used to get a concise summary of the dataframe.
advertising_df.info()


# In[ ]:


### Get summary of statistics of the data
advertising_df.describe()


# In[ ]:


#shape of dataframe - 1199 rows, five columns
advertising_df.shape


# Let's check for any null values.

# In[ ]:


# The isnull() method is used to check and manage NULL values in a data frame.
advertising_df.isnull().sum()


# ## Exploratory Data Analysis (EDA)
# 
# Let's create some simple plots to check out the data!  

# In[ ]:


## Plot the heatmap so that the values are shown.

plt.figure(figsize=(10,5))
sns.heatmap(advertising_df.corr(),annot=True,vmin=0,vmax=1,cmap='ocean')


# In[ ]:


#create a correlation matrix
corr = advertising_df.corr()
plt.figure(figsize=(10, 5))
sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.7)],
            cmap='viridis', vmax=1.0, vmin=-1.0, linewidths=0.1,
            annot=True, annot_kws={"size": 8}, square=True)
plt.tight_layout()
display(plt.show())


# In[ ]:


advertising_df.corr()


# In[ ]:


### Visualize Correlation

# Generate a mask for the upper triangle
mask = np.zeros_like(advertising_df.corr(), dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(advertising_df.corr(), mask=mask, cmap=cmap, vmax=.9, square=True, linewidths=.5, ax=ax)


# Since Sales is our target variable, we should identify which variable correlates the most with Sales.
# 
# As we can see, TV has the highest correlation with Sales.
# Let's visualize the relationship of variables using scatterplots.

# Rather than plot them separately, an efficient way to view the linear relationsips between variables is to use a "for loop" that plots all of the features at once.
# 
# It seems there's no clear linear relationships between the predictors.
# 
# At this point, we know that the variable TV will more likely give better prediction of Sales because of the high correlation and linearity of the two.

# In[ ]:


'''=== Show the linear relationship between features  and sales Thus, it provides that how the scattered
      they are and which features has more impact in prediction of house price. ==='''

# visiualize all variables  with sales
from scipy import stats
#creates figure
plt.figure(figsize=(18, 18))

for i, col in enumerate(advertising_df.columns[0:13]): #iterates over all columns except for price column (last one)
    plt.subplot(5, 3, i+1) # each row three figure
    x = advertising_df[col] #x-axis
    y = advertising_df['sales'] #y-axis
    plt.plot(x, y, 'o')

    # Create regression line
    plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1)) (np.unique(x)), color='red')
    plt.xlabel(col) # x-label
    plt.ylabel('sales') # y-label


# Concluding results after observing the Graph
# The relation bw TV and Sales is stong and increases in linear fashion
# The relation bw Radio and Sales is less stong
# The relation bw TV and Sales is weak

# ## Training a Linear Regression Model
# 
# Regression is a supervised machine learning process.  It is similar to classification, but rather than predicting a label, you try to predict a continuous value.   Linear regression defines the relationship between a target variable (y) and a set of predictive features (x).  Simply stated, If you need to predict a number, then use regression.
# 
# Let's now begin to train your regression model! You will need to first split up your data into an X array that contains the features to train on, and a y array with the target variable, in this case the Price column. You will toss out the Address column because it only has text info that the linear regression model can't use.

# #### Data Preprocessing

# ##### Split: X (features) and y (target)
# Next, let's define the features and label.  Briefly, feature is input; label is output. This applies to both classification and regression problems.

# In[ ]:


X = advertising_df[['digital', 'TV', 'radio', 'newspaper']]
y = advertising_df['sales']


# ##### Scaling (Normalization)

# In[ ]:


'''=== Noramlization the features. Since it is seen that features have different ranges, it is best practice to
normalize/standarize the feature before using them in the model ==='''

#feature normalization
normalized_feature =  keras.utils.normalize(X.values)


# ##### Train - Test - Split
# 
# Now let's split the data into a training and test set.  Note:  Best pracices is to split into three - training, validation, and test set.
# 
# By default - It splits the given data into 75-25 ratio
# 

# In[ ]:


# Import train_test_split function from sklearn.model_selection
from sklearn.model_selection import train_test_split

# Split up the data into a training set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)


# In[ ]:


print(X_train.shape,X_test.shape, y_train.shape, y_test.shape )


# # Step 2:  Build Network
# 

# #### Build and Train the Network

# In[ ]:


## Build Model (Building a three layer network - with one hidden layer)
model = Sequential()
model.add(Dense(4,input_dim=4, activation='relu'))                                                  # You don't have to specify input size.Just define the hidden layers
model.add(Dense(3,activation='relu'))
model.add(Dense(1))

# Compile Model
model.compile(optimizer='adam', loss='mse',metrics=['mse'])

#  Fit the Model
history = model.fit(X_train, y_train, validation_data = (X_test, y_test),
                    epochs = 32)


# ### Visualization

# You can add more 'flavor' to the graph by making it bigger and adding labels and names, as shown below.

# In[ ]:


## Plot a graph of model loss # show the graph of model loss in trainig and validation

plt.figure(figsize=(15,8))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss (MSE) on Training and Validation Data')
plt.ylabel('Loss-Mean Squred Error')
plt.xlabel('Epoch')
plt.legend(['Val Loss', 'Train Loss'], loc='upper right')
plt.show()


# # Step 3:  Tune the Neural Network Hyperparameters

# ## CHALLENGE: Manual Hyperparameter Tuning

# Play with:
# 1. Adding additional layers
# 2. Adding additional neurons
# 3. Change the number of epochs.
# 
# NOTE: After each change, run the cell to plot the graph and review the loss curves.

# In[ ]:


## Build Model
model = Sequential()
model.add(Dense(4,input_dim=4, activation='relu'))
model.add(Dense(4,activation='relu'))
model.add(Dense(3,activation='relu'))
model.add(Dense(1))

# Compile Model
model.compile(optimizer='adam', loss='mse',metrics=['mse'])

#  Fit the Model
history = model.fit(X_train, y_train, validation_data = (X_test, y_test),
                    epochs = 100)


# In[ ]:


plt.figure(figsize=(15,8))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss (MSE) on Training and Validation Data')
plt.ylabel('Loss-Mean Squred Error')
plt.xlabel('Epoch')
plt.legend(['Val Loss', 'Train Loss'], loc='upper right')
plt.show()


# ### The following model adds a learning rate and batch size. It also shows four nodes per layer.

# In[ ]:


## Build Model (Building a four layer network - with two hidden layers)
model = Sequential()
model.add(Dense(4,input_dim=4, activation='relu'))
model.add(Dense(4,activation='relu'))                                        # You don't have to specify input size.Just define the hidden layers
model.add(Dense(4,activation='relu'))
model.add(Dense(1))


# Compile Model
opt = keras.optimizers.Adam(learning_rate=.001)
model.compile(optimizer=opt, loss='mse', metrics=['mse'])

#  Fit the Model
history = model.fit(X_train, y_train, validation_data = (X_test, y_test),
                    epochs = 32, batch_size=32, verbose=0)
                                                                               # Train the model, iterating on the data in batches of 32 samples

#Epoch - #number of epochs to train 32
#Batch size - amount of data each iteration in an epoch sees


# In[ ]:


plt.figure(figsize=(15,8))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss (MSE) on Training and Validation Data')
plt.ylabel('Loss-Mean Squred Error')
plt.xlabel('Epoch')
plt.legend(['Val Loss', 'Train Loss'], loc='upper right')
plt.show()