#!/usr/bin/env python # coding: utf-8 # # Activity 4 - Linear Regression # # In this example, we see how a system can perform regression analysis. Regression is very much a statistical approach for learning, such that we can learn what the output Y should be when we know x. We use the linear regression equations (see link for more details), to identify the slope / gradient of the line, and the y-intercept. We can then express a line that generalises our data in the form Y = mx + c. # In[1]: import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns # In[2]: def generate_data(): ### we will generate data that looks like y = mx + c with some random added noise c = 3 m = 0.7 noise = 20 data = [] for i in range(100): x = np.random.random() * 50 y = (m * x + c) + ( np.random.random() * noise + noise/2) data.append([x,y]) data = np.array(data) return data data = generate_data() # In[3]: plt.scatter(data[:,0], data[:,1], color='k') plt.show() # In[4]: # Let's create some useful variables that we will work with N = data.shape[0] X = data[:,0] Y = data[:,1] XY = X * Y X2 = X ** 2 Y2 = Y ** 2 # In[5]: #### http://www.statisticshowto.com/how-to-find-a-linear-regression-equation/ c = ((np.sum(Y) * np.sum(X2)) - (np.sum(X) * np.sum(XY))) / (N * np.sum(X2) - np.sum(X) ** 2) m = ((N * np.sum(XY)) - (np.sum(X) * np.sum(Y))) / (N * np.sum(X2) - np.sum(X) ** 2) print (c, m) # In[6]: x_1 = 0 x_2 = 50 Y1 = (m * x_1) + c Y2 = (m * x_2) + c plt.scatter(data[:,0], data[:,1], color='k') plt.plot([x_1, x_2], [Y1, Y2]) plt.show() # In[7]: some_new_data = np.array([13,26,35,43]) output_predictions = (m * some_new_data) + c output_predictions plt.scatter(data[:,0], data[:,1], color='k', alpha=0.2) plt.scatter(some_new_data, output_predictions, color='red') #plt.plot([x_1, x_2], [Y1, Y2]) plt.show() # In[ ]: