#!/usr/bin/env python
# coding: utf-8
#
Week 3: Learning (Part II, Intro to Neural Nets)
#
# CSCI-UA 9473 - Introduction to Machine Learning
#
# Partial Solutions
# ### Part 1. A simple linearly separable dataset (gradient)
# In[4]:
# a simple neural network which takes as input a 1D latent variable
import numpy as np
import copy
import matplotlib.pyplot as plt
input_dim = 2
output_dim = 1
# number of neurons per layer
network_size = [1]
total_size = copy.deepcopy(network_size)
total_size.append(output_dim)
num_layers = len(network_size)
# In[8]:
# defining the activation function
def activation1(x):
sigma = 1/(1+np.exp(-x))
derivative = sigma*(1-sigma)
return sigma, derivative
# In[13]:
# forward and backward propagation
x_in = np.random.normal(0,1,(input_dim,1))
def SGD_neuralNet(x_in, target, weights, biases):
# forward propagation
current_input = x_in
# adding the bias term
preactivation = []
postactivation = []
for l in np.arange(len(weights)):
output_s = np.shape(weights[l])[1]
tmp = np.matmul(weights[l],current_input).reshape(-1,1) \
+ biases[l].reshape(-1,1)
tmp2 = activation1(tmp)[0]
preactivation.append(tmp)
postactivation.append(tmp2)
current_output = tmp2
current_input = current_output
### backpropagation
loss = -target*np.log(current_output) -(1-target)*np.log(1-current_output)
delta_out = current_output - target
current_delta = delta_out
weight_backp = weights[::-1]
preactivation_backp = preactivation[:-1][::-1]
postactivation_backp = postactivation[:-1][::-1]
grad = []
grad_biases = []
grad.append(np.squeeze(delta_out)*np.squeeze(postactivation_backp[0]))
grad_biases.append(delta_out)
postactivation_backp.append(x_in)
for l in np.arange(len(weights)-1):
tmp = np.matmul(weight_backp[l].T, current_delta)
sigmaPrime = np.squeeze(activation1(preactivation_backp[l])[1])
current_delta = np.multiply(tmp.reshape(-1,1),sigmaPrime.reshape(-1,1))
tmp1 = postactivation_backp[l+1].reshape(-1,1).T
tmp2 = np.matmul(np.squeeze(current_delta).reshape(-1,1), tmp1)
grad.append(np.squeeze(tmp2))
grad_biases.append(np.squeeze(current_delta))
grad = grad[::-1]
grad_biases = grad_biases[::-1]
return loss, current_output, grad, grad_biases
# In[15]:
from sklearn.datasets import make_classification
X1, Y1 = make_classification(n_features=2, n_redundant=0, n_informative=2,
n_clusters_per_class=1, class_sep=2, random_state=1)
plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1,
s=25, edgecolor='k')
data = X1
targets = Y1
# applying the gradient step
x_in = data[0,:]
target = targets[0]
weights = []
biases = []
current_is = input_dim
current_os = network_size[0]
weights.append(np.random.normal(0,1,(current_os, current_is)))
biases.append(np.random.normal(0,1,(current_os,)))
# random initialization of weights
for l in np.arange(1,len(total_size)):
current_is = current_os
current_os = total_size[l]
weights.append(np.random.normal(0,1,(current_os, current_is)))
biases.append(np.random.normal(0,1,(current_os, 1)))
# learning rate
# learning rate
eta = .01
num_epochs = 2000
total_loss = np.zeros(num_epochs)
for e in range(num_epochs):
# random swapping
indices_epoch = np.arange(np.shape(data)[0])
np.random.shuffle(indices_epoch)
data_epoch = data[indices_epoch,:]
target_epoch = targets[indices_epoch]
# SGD
grad_weights_tmp = []
grad_biases_tmp = []
for l in np.arange(len(weights)):
grad_weights_tmp.append(np.zeros(np.shape(weights[l])))
grad_biases_tmp.append(np.zeros(np.shape(biases[l])))
for i in np.arange(len(target_epoch)):
loss, f, g, b = SGD_neuralNet(data_epoch[i,:], target_epoch[i], weights, biases)
total_loss[e] += loss
#one gradient step
for l in np.arange(len(weights)):
grad_weights_tmp[l] = np.squeeze(grad_weights_tmp[l])+ np.squeeze(g[l])
grad_biases_tmp[l] = np.squeeze(grad_biases_tmp[l]) + np.squeeze(b[l])
for l in np.arange(len(weights)):
weights[l] = weights[l] - (eta/len(target_epoch))*grad_weights_tmp[l]
biases[l] = biases[l].reshape(-1,1) - (eta/len(target_epoch))*grad_biases_tmp[l].reshape(-1,1)
x1min = np.min(data[:,0])
x1max = np.max(data[:,0])
x2min = np.min(data[:,1])
x2max = np.max(data[:,1])
xx1 = np.linspace(x1min, x1max, 100)
xx2 = np.linspace(x2min, x2max, 100)
xx1,xx2 = np.meshgrid(xx1, xx2)
data_grid = np.vstack((xx1.flatten(), xx2.flatten())).T
prediction = np.zeros((np.shape(data_grid)[0],1))
for sample in np.arange(np.shape(data_grid)[0]):
prediction[sample,:] = SGD_neuralNet(data_grid[sample,:], 0, weights, biases)[0]
plt.scatter(data[:,0], data[:,1], c = targets)
plt.contourf(xx1,xx2, np.reshape(prediction>0.5, np.shape(xx1)), levels=1, alpha = .1)
plt.show()
# In[16]:
import matplotlib.pyplot as plt
plt.semilogy(total_loss)
plt.show()
# ### Part 2. The XOR Gate (gradient)
# In[17]:
#
from scipy.io import loadmat
data1 = loadmat('neural_net_class1.mat')['neural_net_class1']
data2 = loadmat('neural_net_class2.mat')['neural_net_class2']
targets1 = np.ones((np.shape(data1)[0],1))
targets0 = np.zeros((np.shape(data2)[0],1))
targets = np.vstack((targets1, targets0))
data = np.vstack((data1, data2))
# applying the SGD step
input_dim = 2
output_dim = 1
# number of neurons per layer
network_size = [20,20]
total_size = copy.deepcopy(network_size)
total_size.append(output_dim)
num_layers = len(network_size)
weights = []
biases = []
current_is = input_dim
current_os = network_size[0]
weights.append(np.random.normal(0,1,(current_os, current_is)))
biases.append(np.random.normal(0,1,(current_os,)))
# random initialization of weights
for l in np.arange(1,len(total_size)):
current_is = current_os
current_os = total_size[l]
weights.append(np.random.normal(0,1,(current_os, current_is)))
biases.append(np.random.normal(0,1,(current_os, 1)))
# learning rate
eta = .01
num_epochs = 40000
total_loss = np.zeros(num_epochs)
for e in range(num_epochs):
# random swapping
indices_epoch = np.arange(np.shape(data)[0])
np.random.shuffle(indices_epoch)
data_epoch = data[indices_epoch,:]
target_epoch = targets[indices_epoch]
# SGD
grad_weights_tmp = []
grad_biases_tmp = []
for l in np.arange(len(weights)):
grad_weights_tmp.append(np.zeros(np.shape(weights[l])))
grad_biases_tmp.append(np.zeros(np.shape(biases[l])))
for i in np.arange(len(target_epoch)):
loss, f, g, b = SGD_neuralNet(data_epoch[i,:], target_epoch[i], weights, biases)
total_loss[e] += loss
#one gradient step
for l in np.arange(len(weights)):
grad_weights_tmp[l] = np.squeeze(grad_weights_tmp[l])+ np.squeeze(g[l])
grad_biases_tmp[l] = np.squeeze(grad_biases_tmp[l]) + np.squeeze(b[l])
for l in np.arange(len(weights)):
weights[l] = weights[l] - (eta/len(target_epoch))*grad_weights_tmp[l]
biases[l] = biases[l].reshape(-1,1) - (eta/len(target_epoch))*grad_biases_tmp[l].reshape(-1,1)
x1min = np.min(data[:,0])
x1max = np.max(data[:,0])
x2min = np.min(data[:,1])
x2max = np.max(data[:,1])
from matplotlib.colors import ListedColormap
cm_bright = ListedColormap(['#0000FF', '#FF0000'])
xx1 = np.linspace(x1min, x1max, 100)
xx2 = np.linspace(x2min, x2max, 100)
xx1,xx2 = np.meshgrid(xx1, xx2)
data_grid = np.vstack((xx1.flatten(), xx2.flatten())).T
prediction = np.zeros((np.shape(data_grid)[0],1))
for sample in np.arange(np.shape(data_grid)[0]):
prediction[sample,:] = SGD_neuralNet(data_grid[sample,:], 0, weights, biases)[0]
plt.scatter(data1[:,0], data1[:,1], c='r')
plt.scatter(data2[:,0], data2[:,1], c='b')
plt.contourf(xx1,xx2, np.reshape(prediction>0.5, np.shape(xx1)), levels = 2,alpha=0.2, cmap=cm_bright)
plt.show()
# In[18]:
plt.semilogy(total_loss)
plt.show()