import numpy as np
# From cost_function.ipynb
def costFunction(X, y, theta):
m = len(y)
hypothesis = X @ theta
err = hypothesis - y
return ((1 / (2 * m)) * (np.transpose(err) @ err).item((0, 0)))
def gradientDescent(X, y, theta, alpha, num_iters):
m = len(y)
J_history = np.zeros((num_iters, 1))
for iter in range(0, num_iters):
hypothesis = X @ theta
err = hypothesis - y
theta = theta - alpha * (1 / m) * np.transpose(np.transpose(err) @ X)
J_history[iter, 0] = costFunction(X, y, theta)
return (theta, J_history)
Let's run it against some data:
actual_theta = np.array([
[100],
[40],
])
X = np.array([
[1, 0.8],
[1, 2.3],
[1, 1.6],
])
y = X @ actual_theta
theta = np.array([
[0],
[0],
])
alpha = 0.5 # learning rate of 0.5
num_iters = 200 # 200 iterations
(theta, history) = gradientDescent(X, y, theta, alpha, num_iters)
print("Actual theta_0:", actual_theta.item(0, 0), " Gradient descent theta_0:", theta.item(0, 0))
print("Actual theta_1:", actual_theta.item(1, 0), " Gradient descent theta_1:", theta.item(1, 0))
Actual theta_0: 100 Gradient descent theta_0: 99.99811396616462 Actual theta_1: 40 Gradient descent theta_1: 40.00108261853554
Just to ensure that the error really is decreasing with each run, let's plot the cost history across runs.
from matplotlib import pyplot as plt
fig = plt.figure()
x = np.linspace(0, num_iters - 1, num_iters)
plt.plot(x, history, 'r')
plt.show()