import time print('Last updated: %s' %time.strftime('%d/%m/%Y')) import numpy as np def matrix_lstsqr(x, y): """ Computes the least-squares solution to a linear matrix equation. """ X = np.vstack([x, np.ones(len(x))]).T return (np.linalg.inv(X.T.dot(X)).dot(X.T)).dot(y) def classic_lstsqr(x_list, y_list): """ Computes the least-squares solution to a linear matrix equation. """ N = len(x_list) x_avg = sum(x_list)/N y_avg = sum(y_list)/N var_x, cov_xy = 0, 0 for x,y in zip(x_list, y_list): temp = x - x_avg var_x += temp**2 cov_xy += temp * (y - y_avg) slope = cov_xy / var_x y_interc = y_avg - slope*x_avg return (slope, y_interc) import random import numpy as np random.seed(12345) x = [x_i*random.randrange(8,12)/10 for x_i in range(500)] y = [y_i*random.randrange(8,12)/10 for y_i in range(100,600)] np.testing.assert_almost_equal( classic_lstsqr(x, y), matrix_lstsqr(x, y), decimal=5) print('ok') %matplotlib inline from matplotlib import pyplot as plt import random random.seed(12345) x = [x_i*random.randrange(8,12)/10 for x_i in range(500)] y = [y_i*random.randrange(8,12)/10 for y_i in range(100,600)] slope, intercept = matrix_lstsqr(x, y) line_x = [round(min(x)) - 1, round(max(x)) + 1] line_y = [slope*x_i + intercept for x_i in line_x] plt.figure(figsize=(8,8)) plt.scatter(x,y) plt.plot(line_x, line_y, color='red', lw='2') plt.ylabel('y') plt.xlabel('x') plt.title('Linear regression via least squares fit') ftext = 'y = ax + b = {:.3f} + {:.3f}x'\ .format(slope, intercept) plt.figtext(.15,.8, ftext, fontsize=11, ha='left') plt.show() import timeit import random random.seed(12345) funcs = ['classic_lstsqr', 'matrix_lstsqr'] orders_n = [10**n for n in range(1,5)] timings = {f:[] for f in funcs} for n in orders_n: x_list = ([x_i*np.random.randint(8,12)/10 for x_i in range(n)]) y_list = ([y_i*np.random.randint(10,14)/10 for y_i in range(n)]) x_ary = np.asarray(x_list) y_ary = np.asarray(y_list) timings['classic_lstsqr'].append(min(timeit.Timer('classic_lstsqr(x_list, y_list)', 'from __main__ import classic_lstsqr, x_list, y_list')\ .repeat(repeat=3, number=1000))) timings['matrix_lstsqr'].append(min(timeit.Timer('matrix_lstsqr(x_ary, y_ary)', 'from __main__ import matrix_lstsqr, x_ary, y_ary')\ .repeat(repeat=3, number=1000))) import platform import multiprocessing def print_sysinfo(): print('\nPython version :', platform.python_version()) print('compiler :', platform.python_compiler()) print('NumPy version :', np.__version__) print('\nsystem :', platform.system()) print('release :', platform.release()) print('machine :', platform.machine()) print('processor :', platform.processor()) print('CPU count :', multiprocessing.cpu_count()) print('interpreter:', platform.architecture()[0]) print('\n\n') import matplotlib.pyplot as plt def plot(timings, title, labels, orders_n): plt.rcParams.update({'font.size': 12}) fig = plt.figure(figsize=(11,10)) for lb in labels: plt.plot(orders_n, timings[lb], alpha=0.5, label=labels[lb], marker='o', lw=3) plt.xlabel('sample size n') plt.ylabel('time per computation in milliseconds') #plt.xlim([min(orders_n) / 10, max(orders_n)* 10]) plt.legend(loc=2) plt.grid() plt.xscale('log') plt.yscale('log') plt.title(title) plt.show() title = 'Performance of Linear Regression Least Squares Fits' labels = {'classic_lstsqr': '"classic" least squares in (C)Python', 'matrix_lstsqr': '"matrix" least squares in in (C)Python + NumPy', } print_sysinfo() plot(timings, title, labels, orders_n)