#!/usr/bin/env python # coding: utf-8 # # Network Initializer # # ### What is neuron? # # Feed-forward neural networks are inspired by the information processing of one or more neural cells, called a neuron. A neuron accepts input signals via its dendrites, which pass the electrical signal down to the cell body. The axon carries the signal out to synapses, which are the connections of a cell’s axon to other cell’s dendrites. # In[3]: from random import random, seed def initialize_network(n_inputs, n_hidden, n_outputs): network = list() # Creating hidden layers according to the number of inputs hidden_layer = [{'weights': [random() for i in range(n_inputs + 1)]} for i in range(n_hidden)] network.append(hidden_layer) # Creating output layer according to the number of hidden layers output_layer = [{'weights': [random() for i in range(n_hidden + 1)]} for i in range(n_outputs)] network.append(output_layer) return network # In[4]: # It is good practice to initialize the network weights to small random numbers. # In this case, will we use random numbers in the range of 0 to 1. # To achieve that we seed random with 1 seed(1) # In[6]: # 2 input units, 1 hidden unit and 2 output units network = initialize_network(2, 1, 2) # You can see the hidden layer has one neuron with 2 input weights plus the bias. # The output layer has 2 neurons, each with 1 weight plus the bias. for layer in network: print(layer) # # Forward propagate # # We can calculate an output from a neural network by propagating an input signal through each layer until the output layer outputs its values. # # We can break forward propagation down into three parts: # # # 1. Neuron Activation. # # 2. Neuron Transfer. # # 3. Forward Propagation. # # # 1. Neuron Activation # # The first step is to calculate the activation of one neuron given an input. # # Neuron activation is calculated as the weighted sum of the inputs. Much like linear regression. # # # activation = sum(weight_i * input_i) + bias # # # Where weight is a network weight, input is an input, i is the index of a weight or an input and bias is a special weight that has no input to multiply with (or you can think of the input as always being 1.0). # # In[7]: # Implementation def activate(weights, inputs): activation = weights[-1] for i in range(len(weights) - 1): activation += weights[i] * inputs[i] return activation # # 2. Neuron Transfer # # Once a neuron is activated, we need to transfer the activation to see what the neuron output actually is. # # Different transfer functions can be used. It is traditional to use the *sigmoid activation function*, but you can also use the *tanh* (hyperbolic tangent) function to transfer outputs. More recently, the *rectifier transfer function* has been popular with large deep learning networks. # # # Sigmoid formula # # output = 1 / (1 + e^(-activation)) # In[11]: from math import exp def transfer(activation): return 1.0 / (1.0 + exp(-activation)) # # 3. Forawrd propagate # In[12]: # Foward propagate is self-explanatory def forward_propagate(network, row): inputs = row for layer in network: new_inputs = [] for neuron in layer: activation = activate(neuron['weights'], inputs) neuron['output'] = transfer(activation) new_inputs.append(neuron['output']) inputs = new_inputs return inputs # In[13]: inputs = [1, 0, None] output = forward_propagate(network, inputs) # In[15]: # Running the example propagates the input pattern [1, 0] and produces an output value that is printed. # Because the output layer has two neurons, we get a list of two numbers as output. output # # Backpropagation # # ### What is it? # # 1. Error is calculated between the expected outputs and the outputs forward propagated from the network. # # 2. These errors are then propagated backward through the network from the output layer to the hidden layer, assigning blame for the error and updating weights as they go. # # # ### This part is broken down into two sections. # # - Transfer Derivative # - Error Backpropagation # ## Transfer Derivative # # Given an output value from a neuron, we need to calculate it’s *slope*. # # derivative = output * (1.0 - output) # In[19]: # Calulates the derivation from an neuron output def transfer_derivative(output): return output * (1.0 - output) # # Error Backpropagation # # 1. calculate the error for each output neuron, this will give us our error signal (input) to propagate backwards through the network. # # error = (expected - output) * transfer_derivative(output) # # # expected: expected output value for the neuron # # output: output value for the neuron and transfer_derivative() # # ---- # # The back-propagated error signal is accumulated and then used to determine the error for the neuron in the hidden layer, as follows: # # # error = (weight_k * error_j) * transfer_derivative(output) # # error_j: the error signal from the jth neuron in the output layer # # weight_k: the weight that connects the kth neuron to the current neuron and output is the output for the current neuron # In[20]: def backward_propagate_error(network, expected): for i in reversed(range(len(network))): layer = network[i] errors = list() if i != len(network) - 1: for j in range(len(layer)): error = 0.0 for neuron in network[i + 1]: error += (neuron['weights'][j] * neuron['delta']) errors.append(error) else: for j in range(len(layer)): neuron = layer[j] errors.append(expected[j] - neuron['output']) for j in range(len(layer)): neuron = layer[j] neuron['delta'] = errors[j] * transfer_derivative(neuron['output']) # In[22]: expected = [0, 1] backward_propagate_error(network, expected) # delta: error value for layer in network: print(layer) # # Train Network # # Two parts # # - Update Weights # # - Train Network # ### Update weights # # Once errors are calculated for each neuron in the network via the back propagation method above, they can be used to update weights. # # weight = weight + learning_rate * error * input # # weight = given weight # # The learning_rate parameter is explicitly specified by the programmer. This controls how much the weights will be updated at each step. # A learning rate that is very large may sound appealing, as the weights will be more dramatically updated, which could lead to faster learning. # However, this causes the learning process to become very unstable. Ideally, we want a learning rate that is steady and reliable, but will find a solution in a reasonable amount of time. # In[ ]: