import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
def plot_function(f,xmin,xmax,**kwargs):
ts = np.linspace(xmin,xmax,1000)
plt.plot(ts,[f(t) for t in ts],**kwargs)
def scalar_field_heatmap(f,xmin,xmax,ymin,ymax,xsteps=100,ysteps=100):
fig = plt.figure()
fig.set_size_inches(7,7)
fv = np.vectorize(f)
X = np.linspace(xmin, xmax, xsteps)
Y = np.linspace(ymin, ymax, ysteps)
X, Y = np.meshgrid(X, Y)
z = fv(X,Y)
fig, ax = plt.subplots()
c = ax.pcolormesh(X,Y,z, cmap='plasma')
ax.axis([X.min(), X.max(), Y.min(), Y.max()])
fig.colorbar(c, ax=ax)
def plot_scalar_field(f,xmin,xmax,ymin,ymax,xsteps=100,ysteps=100,c=None,cmap=cm.coolwarm,alpha=1,antialiased=False, zorder=0):
fig = plt.gcf()
fig.set_size_inches(7,7)
ax = fig.gca(projection='3d')
fv = np.vectorize(f)
# Make data.
X = np.linspace(xmin, xmax, xsteps)
Y = np.linspace(ymin, ymax, ysteps)
X, Y = np.meshgrid(X, Y)
Z = fv(X,Y)
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cmap,color=c,alpha=alpha,
linewidth=0, antialiased=antialiased, zorder=zorder)
def bmw_finder(mileage,price):
if price > 25000:
return 1
else:
return 0
from car_data import bmws, priuses
all_car_data = []
for bmw in bmws:
all_car_data.append((bmw.mileage,bmw.price,1))
for prius in priuses:
all_car_data.append((prius.mileage,prius.price,0))
all_car_data
[(93404.0, 13999.0, 1), (110890.0, 13995.0, 1), (94133.0, 13982.0, 1), (46778.0, 14599.0, 1), (53106.0, 22500.0, 1), (58761.0, 24998.0, 1), (108816.0, 24947.0, 1), (81100.0, 13995.0, 1), (90000.0, 8400.0, 1), (68613.0, 14995.0, 1), (94000.0, 11995.0, 1), (92500.0, 10995.0, 1), (112081.0, 11995.0, 1), (105121.0, 11500.0, 1), (92000.0, 2013.0, 1), (107953.0, 12999.0, 1), (56000.0, 18995.0, 1), (101191.0, 18900.0, 1), (64365.0, 16998.0, 1), (66000.0, 10000.0, 1), (76675.0, 12500.0, 1), (93015.0, 15995.0, 1), (80917.0, 14970.0, 1), (96000.0, 16795.0, 1), (70000.0, 12999.0, 1), (107000.0, 11950.0, 1), (78000.0, 18995.0, 1), (78000.0, 15000.0, 1), (92000.0, 2013.0, 1), (57624.0, 21963.0, 1), (77854.0, 16995.0, 1), (48310.0, 22998.0, 1), (51656.0, 20998.0, 1), (62410.0, 19991.0, 1), (39332.0, 29995.0, 1), (31420.0, 21000.0, 1), (41267.0, 22450.0, 1), (73000.0, 19999.0, 1), (94608.0, 11995.0, 1), (67000.0, 24964.0, 1), (50000.0, 18985.0, 1), (73601.0, 16999.0, 1), (50000.0, 18985.0, 1), (95229.0, 15980.0, 1), (69000.0, 27247.0, 1), (73309.0, 13790.0, 1), (41573.0, 21970.0, 1), (42172.0, 25842.0, 1), (41085.0, 18750.0, 1), (81625.0, 13990.0, 1), (38000.0, 24900.0, 1), (65000.0, 12000.0, 1), (103000.0, 16495.0, 1), (76000.0, 18500.0, 1), (34000.0, 19950.0, 1), (66000.0, 29500.0, 1), (36756.0, 18999.0, 1), (42681.0, 26697.0, 1), (29000.0, 29997.0, 1), (51000.0, 26968.0, 1), (26000.0, 25980.0, 1), (88262.0, 18338.0, 1), (48321.0, 21322.0, 1), (93762.0, 16991.0, 1), (79972.0, 16499.0, 1), (54177.0, 20774.0, 1), (43294.0, 21800.0, 1), (55736.0, 24985.0, 1), (74896.0, 22599.0, 1), (72282.0, 20770.0, 1), (58328.0, 22000.0, 1), (49856.0, 20998.0, 1), (42229.0, 21998.0, 1), (38126.0, 25998.0, 1), (46000.0, 21900.0, 1), (95000.0, 18500.0, 1), (30000.0, 19899.0, 1), (63000.0, 21500.0, 1), (39835.0, 27629.0, 1), (51043.0, 25964.0, 1), (27000.0, 30998.0, 1), (39591.0, 25000.0, 1), (37703.0, 35990.0, 1), (41467.0, 30254.0, 1), (83000.0, 15900.0, 1), (58623.0, 21998.0, 1), (57276.0, 21998.0, 1), (27620.0, 23998.0, 1), (40237.0, 21998.0, 1), (52961.0, 22998.0, 1), (36707.0, 34888.0, 1), (42363.0, 29891.0, 1), (44943.0, 23697.0, 1), (90000.0, 11000.0, 1), (90000.0, 13000.0, 1), (42470.0, 30977.0, 1), (27000.0, 20999.0, 1), (37000.0, 31990.0, 1), (63000.0, 2016.0, 1), (51000.0, 2016.0, 1), (59014.0, 13995.0, 0), (70000.0, 9900.0, 0), (84507.0, 14998.0, 0), (76000.0, 8300.0, 0), (41868.0, 15998.0, 0), (40631.0, 13991.0, 0), (74000.0, 10532.0, 0), (69703.0, 12900.0, 0), (37000.0, 15850.0, 0), (33000.0, 13995.0, 0), (75000.0, 9500.0, 0), (35000.0, 11100.0, 0), (97109.0, 11991.0, 0), (128882.0, 8359.0, 0), (39000.0, 13759.0, 0), (65000.0, 13990.0, 0), (37000.0, 12950.0, 0), (130000.0, 9500.0, 0), (63430.0, 12995.0, 0), (120000.0, 7600.0, 0), (74000.0, 9000.0, 0), (46000.0, 13982.0, 0), (73428.0, 14991.0, 0), (200000.0, 7950.0, 0), (88180.0, 12998.0, 0), (89466.0, 14599.0, 0), (76985.0, 11990.0, 0), (65000.0, 13990.0, 0), (63621.0, 12395.0, 0), (96000.0, 7900.0, 0), (74000.0, 9000.0, 0), (75000.0, 11000.0, 0), (143000.0, 8999.0, 0), (112000.0, 8900.0, 0), (74000.0, 9000.0, 0), (185000.0, 5950.0, 0), (80424.0, 13000.0, 0), (48655.0, 17998.0, 0), (102000.0, 9997.0, 0), (67508.0, 14991.0, 0), (39395.0, 18998.0, 0), (44531.0, 16577.0, 0), (68000.0, 12495.0, 0), (35000.0, 11999.0, 0), (68000.0, 12495.0, 0), (115000.0, 10900.0, 0), (66293.0, 14472.0, 0), (141000.0, 10000.0, 0), (109496.0, 9677.0, 0), (82000.0, 7500.0, 0), (106361.0, 10953.0, 0), (187000.0, 8800.0, 0), (76912.0, 11993.0, 0), (117000.0, 9450.0, 0), (113963.0, 11494.0, 0), (72000.0, 12490.0, 0), (71218.0, 11994.0, 0), (48924.0, 13786.0, 0), (100000.0, 7800.0, 0), (98531.0, 10979.0, 0), (43015.0, 16977.0, 0), (29695.0, 16599.0, 0), (44640.0, 16900.0, 0), (56478.0, 13990.0, 0), (114959.0, 11060.0, 0), (83861.0, 13000.0, 0), (91314.0, 11911.0, 0), (37000.0, 16375.0, 0), (46000.0, 14500.0, 0), (45358.0, 16991.0, 0), (68432.0, 16998.0, 0), (37739.0, 19998.0, 0), (89352.0, 12995.0, 0), (65000.0, 13500.0, 0), (61000.0, 13000.0, 0), (35385.0, 13988.0, 0), (65000.0, 13500.0, 0), (57000.0, 16791.0, 0), (90019.0, 14790.0, 0), (136095.0, 11595.0, 0), (27000.0, 16235.0, 0), (54480.0, 17910.0, 0), (100000.0, 16000.0, 0), (65899.0, 16482.0, 0), (27000.0, 17734.0, 0), (144000.0, 9800.0, 0), (26989.0, 17925.0, 0), (67662.0, 13400.0, 0), (38401.0, 16998.0, 0), (80982.0, 13999.0, 0), (43000.0, 17492.0, 0), (87428.0, 11794.0, 0), (34028.0, 17491.0, 0), (37000.0, 15800.0, 0), (32000.0, 20995.0, 0), (89000.0, 12999.0, 0), (64000.0, 15000.0, 0), (45000.0, 16900.0, 0), (38000.0, 13500.0, 0), (71000.0, 12500.0, 0)]
def test_classifier(classifier, data):
trues = 0
falses = 0
for mileage, price, is_bmw in data:
if classifier(mileage, price) == is_bmw: #1
trues += 1
else:
falses += 1 #1
return trues / (trues + falses)
test_classifier(bmw_finder, all_car_data)
0.59
Exercise: Update the test_classifier
function to print the number of true positives, true negatives, false positives, and false negatives. Printing these for the bmw_finder
classifier, what can you tell about the performance of the classifier?
def test_classifier(classifier, data, verbose=False): #1
true_positives = 0 #2
true_negatives = 0
false_positives = 0
false_negatives = 0
for mileage, price, is_bmw in data:
predicted = classifier(mileage,price)
if predicted and is_bmw: #3
true_positives += 1
elif predicted:
false_positives += 1
elif is_bmw:
false_negatives += 1
else:
true_negatives += 1
if verbose:
print("true positives %f" % true_positives) #4
print("true negatives %f" % true_negatives)
print("false positives %f" % false_positives)
print("false negatives %f" % false_negatives)
return (true_positives + true_negatives) / len(data) #5
test_classifier(bmw_finder,all_car_data,verbose=True)
true positives 18.000000 true negatives 100.000000 false positives 0.000000 false negatives 82.000000
0.59
Exercise: Find a way to update the bmw_finder
function slightly to improve its performance, and use the test_classifier
function to confirm that your improved function has better than 59% accuracy.
def bmw_finder2(mileage,price):
if price > 20000:
return 1
else:
return 0
test_classifier(bmw_finder2, all_car_data)
0.735
def plot_data(ds):
plt.scatter([d[0] for d in ds if d[2]==0],[d[1] for d in ds if d[2]==0],c='C1')
plt.scatter([d[0] for d in ds if d[2]==1],[d[1] for d in ds if d[2]==1],c='C0',marker='x')
plt.ylabel("Price ($)",fontsize=16)
plt.xlabel("Odometer (mi)",fontsize=16)
plot_data(all_car_data)
plot_data(all_car_data)
plot_function(lambda x: 25000, 0, 200000, c='k')
plot_data(all_car_data)
plot_function(lambda x: 21000 - 0.07 * x, 0, 200000, c='k')
def decision_boundary_classify(mileage,price):
if price > 21000 - 0.07 * mileage:
return 1
else:
return 0
test_classifier(decision_boundary_classify, all_car_data)
0.805
Mini-project: What is the decision boundary of the form $p = constant$ giving the best classification accuracy on the test data set?
def constant_price_classifier(cutoff_price):
def c(x,p):
if p > cutoff_price:
return 1
else:
return 0
return c
def cutoff_accuracy(cutoff_price):
c = constant_price_classifier(cutoff_price)
return test_classifier(c,all_car_data)
all_prices = [price for (mileage,price,is_bmw) in all_car_data]
max(all_prices,key=cutoff_accuracy)
17998.0
test_classifier(constant_price_classifier(17998.0), all_car_data)
0.795
def make_scale(data):
min_val = min(data) #1
max_val = max(data)
def scale(x): #2
return (x-min_val) / (max_val - min_val)
def unscale(y): #3
return y * (max_val - min_val) + min_val
return scale, unscale #4
price_scale, price_unscale = make_scale([x[1] for x in all_car_data]) #5
mileage_scale, mileage_unscale = make_scale([x[0] for x in all_car_data])
scaled_car_data = [(mileage_scale(mileage), price_scale(price), is_bmw)
for mileage,price,is_bmw in all_car_data]
plot_data(scaled_car_data)
plot_data(scaled_car_data)
plot_function(lambda x: 0.56 - 0.35*x,0,1,c='k')
scalar_field_heatmap(lambda x,p: p + 0.35*x - 0.56, 0, 1,0,1)
plt.ylabel('Price', fontsize=16)
plt.xlabel('Mileage', fontsize=16)
plot_function(lambda x: 0.56-0.35*x,0,1,c='k')
<Figure size 504x504 with 0 Axes>
from math import exp
def sigmoid(x):
return 1 / (1+exp(-x))
plot_function(sigmoid,-5,5)
def f(x,p):
return p + 0.35 * x - 0.56
def l(x,p):
return sigmoid(f(x,p))
scalar_field_heatmap(l, 0, 1,0,1)
# plot_data(scaled_car_data,white=True)
plt.ylabel('Price', fontsize=16)
plt.xlabel('Mileage', fontsize=16)
plot_function(lambda x: 0.56-0.35*x,0,1,c='k')
<Figure size 504x504 with 0 Axes>
plot_scalar_field(l, -5, 5, -5, 5)
plot_scalar_field(f,-5,5,-5,5)
Exercise: Find a function $h(x)$ such that large positive values of $x$ cause $h(x)$ to be very close to $0$, large negative values of $x$ cause $h(x)$ to be very close to $1$, and $h(3) = 0.5$.
def h(x):
return sigmoid(3-x)
plot_function(h,-5,10)
def make_logistic(a,b,c):
def l(x,p):
return sigmoid(a*x + b*p - c)
return l
def simple_logistic_cost(a,b,c):
l = make_logistic(a,b,c)
errors = [abs(is_bmw-l(x,p))
for x,p,is_bmw in scaled_car_data]
return sum(errors)
from math import log
plot_function(lambda x: -log(x),0.01,1)
-log(0.01)
4.605170185988091
-log(0.001)
6.907755278982137
def point_cost(l,x,p,is_bmw): #1
wrong = 1 - is_bmw
return -log(abs(wrong - l(x,p)))
def logistic_cost(a,b,c):
l = make_logistic(a,b,c)
errors = [point_cost(l,x,p,is_bmw) #2
for x,p,is_bmw in scaled_car_data]
return sum(errors)
def plot_line(acoeff,bcoeff,ccoeff,**kwargs):
a,b,c = acoeff, bcoeff, ccoeff
# black by default
if 'c' not in kwargs:
kwargs['c'] = 'k'
if b == 0:
plt.plot([c/a,c/a],[0,1])
else:
def y(x):
return (c-a*x)/b
plt.plot([0,1],[y(0),y(1)],**kwargs)
plot_data(scaled_car_data)
plot_line(0.35,1,0.56,)
plot_line(1,1,1)
logistic_cost(0.35,1,0.56)
130.92490748700456
logistic_cost(1,1,1)
135.56446830870456
Exercise: Implement the function plot_line(a,b,c)
referenced in the chapter that plots the line $ax + by = c$ where $0 \leq x \leq 1$ and $0 \leq y \leq 1$.
Solution: (see implementation above)
Mini-project: What does the graph of $k(x,y) = \sigma(x^2 + y^2 - 1)$ look like? What does the decision boundary look like, meaning the set of points where $k(x,y) = 0.5$?
def k(x,y):
return sigmoid(x**2 + y**2 - 1)
plot_scalar_field(k,-3,3,-3,3)
Mini-project: Two equations $2x + y = 1$ and $4x + 2y = 2$ define the same line, and therefore the same decision boundary. Are the logistic functions $\sigma(2x + y - 1)$ and $\sigma(4x + 2y - 2)$ the same?
plot_scalar_field(lambda x,y:sigmoid(2*x+y-1),-3,3,-3,3)
plot_scalar_field(lambda x,y:sigmoid(4*x+2*y-2),-3,3,-3,3)
from math import sqrt
def length(v):
return sqrt(sum([vi*vi for vi in v]))
def secant_slope(f,xmin,xmax):
return (f(xmax) - f(xmin)) / (xmax - xmin)
def approx_derivative(f,x,dx=1e-6):
return secant_slope(f,x-dx,x+dx)
def approx_gradient(f,x0,y0,dx=1e-6):
partial_x = approx_derivative(lambda x:f(x,y0),x0,dx=dx)
partial_y = approx_derivative(lambda y:f(x0,y),y0,dx=dx)
return (partial_x,partial_y)
def approx_gradient3(f,x0,y0,z0,dx=1e-6):
partial_x = approx_derivative(lambda x:f(x,y0,z0),x0,dx=dx)
partial_y = approx_derivative(lambda y:f(x0,y,z0),y0,dx=dx)
partial_z = approx_derivative(lambda z:f(x0,y0,z),z0,dx=dx)
return (partial_x,partial_y,partial_z)
def gradient_descent3(f,xstart,ystart,zstart,
tolerance=1e-6,max_steps=1000):
x = xstart
y = ystart
z = zstart
grad = approx_gradient3(f,x,y,z)
steps = 0
while length(grad) > tolerance and steps < max_steps:
x -= 0.01 * grad[0]
y -= 0.01 * grad[1]
z -= 0.01 * grad[2]
grad = approx_gradient3(f,x,y,z)
steps += 1
return x,y,z
gradient_descent3(logistic_cost,1,1,1,max_steps=100)
(0.21114493148496014, 5.045439728207488, 2.1260122566471376)
gradient_descent3(logistic_cost,1,1,1,max_steps=200)
(0.8845715265377516, 6.65754318727634, 2.9550572849988455)
To make a cool graph of the decision boundary moving as gradient descent runs:
plot_data(scaled_car_data)
for i in range(0,1000,100):
a,b,c = gradient_descent3(logistic_cost,1,1,1,max_steps=i)
plot_line(a,b,c,alpha=i/1000,c='k')
gradient_descent3(logistic_cost,1,1,1,max_steps=8000)
(3.7167003088210118, 11.422062406779267, 5.596878365032182)
plot_data(scaled_car_data)
plot_line(0.35,1,0.56)
plot_line(3.7167003153580045, 11.422062409195114, 5.596878367305919)
def best_logistic_classifier(x,p):
l = make_logistic(3.7167003153580045, 11.422062409195114, 5.596878367305919)
if l(x,p) > 0.5:
return 1
else:
return 0
test_classifier(best_logistic_classifier,scaled_car_data)
0.8
plot_scalar_field(make_logistic(0.35,1,0.56), -2, 2, -2, 2)
plot_scalar_field(make_logistic(3.7167003153580045, 11.422062409195114, 5.596878367305919), -2, 2, -2, 2)
Exercise: Modify the gradient_descent3
function to print out the total number of steps taken before it returns its result. How many steps does the gradient descent take to converge for logistic_cost
?
def gradient_descent3(f,xstart,ystart,zstart,tolerance=1e-6,max_steps=1000):
x = xstart
y = ystart
z = zstart
grad = approx_gradient3(f,x,y,z)
steps = 0
while length(grad) > tolerance and steps < max_steps:
x -= 0.01 * grad[0]
y -= 0.01 * grad[1]
z -= 0.01 * grad[2]
grad = approx_gradient3(f,x,y,z)
steps += 1
print(steps)
return x,y,z
gradient_descent3(logistic_cost,1,1,1,max_steps=8000)
7243
(3.7167003088210118, 11.422062406779267, 5.596878365032182)
Mini project: Write an approx_gradient
function that can calculate the gradient of a function in any number of dimensions. Then write a gradient_descent
function that works in any number of dimensions. To test your gradient_descent
on an $n$-dimensional function, you can try a function like $f(x_1,x_2, \ldots ,x_n) = (x_1 - 1)^2 + (x_2 - 1)^ + \ldots + (x_n - 1)^2$ where $x_1, x_2, \ldots , x_n$ are the $n$ input variables to the function $f$. The minimum of this function should be $(1,1, \ldots , 1)$, an $n$-dimensional vector with 1 in every entry.
def partial_derivative(f,i,v,**kwargs):
def cross_section(x):
arg = [(vj if j != i else x) for j,vj in enumerate(v)]
return f(*arg)
return approx_derivative(cross_section, v[i], **kwargs)
def approx_gradient(f,v,dx=1e-6):
return [partial_derivative(f,i,v) for i in range(0,len(v))]
def gradient_descent(f,vstart,tolerance=1e-6,max_steps=1000):
v = vstart
grad = approx_gradient(f,v)
steps = 0
while length(grad) > tolerance and steps < max_steps:
v = [(vi - 0.01 * dvi) for vi,dvi in zip(v,grad)]
grad = approx_gradient(f,v)
steps += 1
return v
def sum_squares(*v):
return sum([(x-1)**2 for x in v])
v = [2,2,2,2,2]
gradient_descent(sum_squares,v)
[1.0000002235452137, 1.0000002235452137, 1.0000002235452137, 1.0000002235452137, 1.0000002235452137]