#!/usr/bin/env python # coding: utf-8 # # 1. Importing necessary packages # ```numpy``` to handle matrices, ```matplotlib.pyplot``` fo illustrate graphics and ```accuracy_scor``` to measure the accuracy of the model. Then ```sklean.datasets``` to generate random data for simulating a dataset, more precisely we use ```make_blobs``` that actually generate isotropic Gaussian blobs for clustering. # In[1]: import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score from sklearn.datasets import make_blobs # # 2. Simulating a Dataset # $X$ represents the input data, and it is a $n\times 2$ matrix which contains n observations of pairs $x_i=(x_{1,i},x_{2,i})\in \mathbb{R}^2$. $y_i \in \left\{0;1\right\}$ is the response variable which is binary and $Y$ is the output vector which contains the $n$ observed responses $y_i$'s. # In[2]: X, y = make_blobs(n_samples=100, n_features=2, center_box=(-8.0, 8.0), centers=2, random_state=7) y = y.reshape((y.shape[0], 1)) print('dimensions de X:', X.shape) print('dimensions de y:', y.shape) plt.scatter(X[:,0], X[:, 1], c=y, cmap="copper") plt.show() # Next, we will split the artificial neuron into several pieces, where each piece is defined as a function as follows: # # 3. Defining model functions # ## 3.1. initialization # Random values are assigned to the parameters to start the first forward propagation before starting the backward ster that consists on updating their values. # In[3]: def initialization(X): W = np.random.randn(X.shape[1], 1) # generating a samples from the "standard normal" distribution to b = np.random.randn(1) # as weight matrix W and bias b return (W, b) # ## 3.2 Sigmoid function (logit) # Z is the linear combination of the values of $X$ with the coefficients $W$ plus a bias term. And $A$ represents the value of the sigmoid function at this point $Z$. # In[4]: def model(X, W, b): Z = X.dot(W) + b # Z is a linear combination of X and W plus a bias term A = 1 / (1 + np.exp(-Z)) # A is the value of the sigmoid/logit function applied to Z return A # ## 3.3 Calculating the loss function : log-likelihood # We can express the likelihood as follows ! # # $$L={\prod}_{i=1}^n a_{i}^{y_{i}}\times\left(1-a_{i}\right)^{1-y_{i}}$$ # # instead of maximising the log-likelihood, we prefer to minimise its negative version putting a minus sign in front of it and considering it as a loss function to minimise. # $$\mathcal{L}=-\frac{1}{m}{\sum}_{i=1}^ny_{i}\log\left(a_{i}\right)+\left(1-y_{i}\right)\log\left(1-a_{i}\right)$$ # In[5]: def log_loss(A, y): return 1 / len(y) * np.sum(-y * np.log(A) - (1 - y) * np.log(1 - A)) # log-likelihood # ## 3.4 Obtaining the gradient values and updating # Using the sigmoid as an activation function leads to the expression below, they might change if one uses a different activation function. # In[6]: def gradients(A, X, y): dW = 1 / len(y) * np.dot(X.T, A - y) # Gradiant calculus to update weights db = 1 / len(y) * np.sum(A - y) # ... ... to update bias return (dW, db) # In[7]: def update(dW, db, W, b, learning_rate): """updating function""" W = W - learning_rate * dW b = b - learning_rate * db return (W, b) # The function that calculate the value of sigmoid at the poing X with the actual values of parameters W and b, and then returns a binary value, 1 if the value exceeds some threshold and 0 if not. We fixed by default the value of the threshold to 0.5. # In[8]: def predict(X, W, b, border=0.5): """Choosing arbitrary the border to be at 0.5. \n It might be better to consider the border as a hyperparameter to calibrate""" A = model(X, W, b) # print(A) return A >= border # # 4. The model # We finally defined all needed functions. We're now able to put all that pieces inside one function called ```artificial_single_neuron``` that takes X and y as entries and some additional parameters as the learning_rate and the number of iterations, and after running, it returns the updated values of parameters minimising the loss function. # In[9]: def artificial_single_neuron(X, y, learning_rate = 0.1, n_iter = 100): # initialisation W, b W, b = initialization(X) Loss = [] for i in range(n_iter): A = model(X, W, b) Loss.append(log_loss(A, y)) dW, db = gradients(A, X, y) W, b = update(dW, db, W, b, learning_rate) y_pred = predict(X, W, b) print(accuracy_score(y, y_pred)) plt.plot(Loss) plt.show() return (W, b) # In[10]: W, b = artificial_single_neuron(X, y) # # 3. Frontiere de décision # In[11]: plt.style.use('Solarize_Light2') # In[12]: fig, ax = plt.subplots(figsize=(9, 6)) ax.scatter(X[:,0], X[:, 1], c=y, cmap='copper') x1 = np.linspace(-7, 0, 100) x2 = ( - W[0] * x1 - b) / W[1] ax.plot(x1, x2, c='orange', lw=3) plt.show() # # 5. Additional examples # In[13]: X, y = make_blobs(n_samples=1000, n_features=2, centers=2, random_state=40) y = y.reshape((y.shape[0], 1)) print('dimensions de X:', X.shape) print('dimensions de y:', y.shape) plt.scatter(X[:,0], X[:, 1], c=y, cmap="copper") plt.show() W, b = artificial_single_neuron(X, y) fig, ax = plt.subplots(figsize=(9, 6)) #plt.style.use('dark_background') # plt.style.use('Solarize_Light2') # fig.suptitle("Graphic") ax.scatter(X[:,0], X[:, 1], c=y, cmap="copper") x1 = np.linspace(-1, 5, 100) x2 = ( - W[0] * x1 - b) / W[1] ax.plot(x1, x2, c='red', lw=3) plt.show() # # Conclusion # Only with few lines of code, we've been able to build a program that processes a backward-forward propagation, updating parameters using data and gradient descent, and then output the optimal weights that minimise the loss function the negative version of likelihood.