import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
digits = datasets.load_digits()
digits.images[0]
array([[ 0., 0., 5., 13., 9., 1., 0., 0.], [ 0., 0., 13., 15., 10., 15., 5., 0.], [ 0., 3., 15., 2., 0., 11., 8., 0.], [ 0., 4., 12., 0., 0., 8., 8., 0.], [ 0., 5., 8., 0., 0., 9., 8., 0.], [ 0., 4., 11., 0., 1., 12., 7., 0.], [ 0., 2., 14., 5., 10., 12., 0., 0.], [ 0., 0., 6., 13., 10., 0., 0., 0.]])
plt.imshow(digits.images[0], cmap=plt.cm.gray_r)
<matplotlib.image.AxesImage at 0x2b926a3da20>
FYI, a way to overlay numbers on the pixels showing their brightness values
plt.imshow(digits.images[0], cmap=plt.cm.gray_r)
for i in range(0,8):
for j in range(0,8):
plt.gca().text(i-0.15,j,int(digits.images[0][i][j]))
np.matrix.flatten(digits.images[0])
array([ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10., 15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4., 12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., 8., 0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5., 10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.])
np.matrix.flatten(digits.images[0]) / 15
array([0. , 0. , 0.33333333, 0.86666667, 0.6 , 0.06666667, 0. , 0. , 0. , 0. , 0.86666667, 1. , 0.66666667, 1. , 0.33333333, 0. , 0. , 0.2 , 1. , 0.13333333, 0. , 0.73333333, 0.53333333, 0. , 0. , 0.26666667, 0.8 , 0. , 0. , 0.53333333, 0.53333333, 0. , 0. , 0.33333333, 0.53333333, 0. , 0. , 0.6 , 0.53333333, 0. , 0. , 0.26666667, 0.73333333, 0. , 0.06666667, 0.8 , 0.46666667, 0. , 0. , 0.13333333, 0.93333333, 0.33333333, 0.66666667, 0.8 , 0. , 0. , 0. , 0. , 0.4 , 0.86666667, 0.66666667, 0. , 0. , 0. ])
def random_classifier(input_vector):
return np.random.rand(10)
v = np.matrix.flatten(digits.images[0]) / 15.
result = random_classifier(v)
result
array([0.09609494, 0.75660412, 0.78242862, 0.43881297, 0.33470396, 0.92745083, 0.9695724 , 0.12471611, 0.01307655, 0.61261849])
NOTE because this is random, you will get a different digit result when you re-run the code.
list(result).index(max(result))
6
digits.target[0]
0
def test_digit_classify(classifier,test_count=1000):
correct = 0 #<1>
for img, target in zip(digits.images[:test_count], digits.target[:test_count]): #<2>
v = np.matrix.flatten(img) / 15. #<3>
output = classifier(v) #<4>
answer = list(output).index(max(output)) #<5>
if answer == target:
correct += 1 #<6>
return (correct/test_count) #<7>
test_digit_classify(random_classifier)
0.089
Exercise: Suppose a digit classifier function outputs the following NumPy array. What digit does it think the image represents?
np.array([5.00512567e-06, 3.94168539e-05, 5.57124430e-09, 9.31981207e-09,
9.98060276e-01, 9.10328786e-07, 1.56262695e-03, 1.82976466e-04,
1.48519455e-04, 2.54354113e-07])
array([5.00512567e-06, 3.94168539e-05, 5.57124430e-09, 9.31981207e-09, 9.98060276e-01, 9.10328786e-07, 1.56262695e-03, 1.82976466e-04, 1.48519455e-04, 2.54354113e-07])
Mini project: Find the average of all the images of nines in the data set, in the same way we took averages of images in Chapter 6. Plot the resulting image. What does it look like?
def average_img(i):
imgs = [img for img,target in zip(digits.images[1000:], digits.target[1000:]) if target==i]
return sum(imgs) / len(imgs)
plt.imshow(average_img(9), cmap=plt.cm.gray_r)
<matplotlib.image.AxesImage at 0x2b926891c50>
Mini project: Build a better classifier than a random one by finding the average image of each kind of digit in the test data set, and comparing a target image with all of the averages. Specifically, return a vector of the dot products of the target image with each average digit image.
avg_digits = [np.matrix.flatten(average_img(i)) for i in range(10)]
def compare_to_avg(v):
return [np.dot(v,avg_digits[i]) for i in range(10)]
test_digit_classify(compare_to_avg)
0.853
class MLP():
def __init__(self,layer_sizes): #<1>
self.layer_sizes = layer_sizes
self.weights = [
np.random.rand(n,m) #<2>
for m,n in zip(layer_sizes[:-1],layer_sizes[1:]) #<3>
]
self.biases = [np.random.rand(n) for n in layer_sizes[1:]] #<4>
nn = MLP([2,3])
NOTE these numbers are randomly initialized, so your results below will vary.
nn.weights
[array([[0.04098182, 0.61121605], [0.99402839, 0.43278994], [0.14826355, 0.39370373]])]
nn.biases
[array([0.62871743, 0.55151304, 0.91445791])]
from math import exp
def sigmoid(x):
return 1 / (1+exp(-x))
class MLP():
def __init__(self,layer_sizes): #<1>
self.layer_sizes = layer_sizes
self.weights = [
np.random.rand(n,m) #<2>
for m,n in zip(layer_sizes[:-1],layer_sizes[1:]) #<3>
]
self.biases = [np.random.rand(n) for n in layer_sizes[1:]] #<4>
def feedforward(self,v):
activations = [] #<1>
a = v
activations.append(a) #<2>
for w,b in zip(self.weights, self.biases): #<3>
z = w @ a + b #<4>
a = [sigmoid(x) for x in z] #<5>
activations.append(a) #<6>
return activations
def evaluate(self,v):
return np.array(self.feedforward(v)[-1])
nn = MLP([64,16,10])
v = np.matrix.flatten(digits.images[0]) / 15.
nn.evaluate(v)
array([0.99996588, 0.99829151, 0.99979785, 0.9998958 , 0.99991507, 0.99982444, 0.9999176 , 0.99863889, 0.99977906, 0.99887847])
test_digit_classify(nn.evaluate)
0.099
x = np.array([np.matrix.flatten(img) for img in digits.images[:1000]]) / 15.0
y = digits.target[:1000]
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(16,), #<1>
activation='logistic', #<2>
max_iter=100, #<3>
verbose=10, #<4>
random_state=1, #<5>
learning_rate_init=.1) #<6>
mlp.fit(x,y)
Iteration 1, loss = 2.21958598 Iteration 2, loss = 1.56912978 Iteration 3, loss = 0.98970277 Iteration 4, loss = 0.57473464 Iteration 5, loss = 0.34048448 Iteration 6, loss = 0.21495855 Iteration 7, loss = 0.14366771 Iteration 8, loss = 0.11077020 Iteration 9, loss = 0.08764273 Iteration 10, loss = 0.07193546 Iteration 11, loss = 0.06020348 Iteration 12, loss = 0.04961899 Iteration 13, loss = 0.03979645 Iteration 14, loss = 0.03334502 Iteration 15, loss = 0.02996006 Iteration 16, loss = 0.02603968 Iteration 17, loss = 0.02355514 Iteration 18, loss = 0.02137348 Iteration 19, loss = 0.01967878 Iteration 20, loss = 0.01751214 Iteration 21, loss = 0.01617330 Iteration 22, loss = 0.01460386 Iteration 23, loss = 0.01408517 Iteration 24, loss = 0.01270504 Iteration 25, loss = 0.01191634 Iteration 26, loss = 0.01114222 Iteration 27, loss = 0.01045989 Iteration 28, loss = 0.00983648 Iteration 29, loss = 0.00920912 Iteration 30, loss = 0.00890851 Iteration 31, loss = 0.00843426 Iteration 32, loss = 0.00796039 Iteration 33, loss = 0.00749839 Iteration 34, loss = 0.00726271 Iteration 35, loss = 0.00673963 Iteration 36, loss = 0.00655405 Iteration 37, loss = 0.00626207 Iteration 38, loss = 0.00600639 Iteration 39, loss = 0.00581857 Iteration 40, loss = 0.00557529 Iteration 41, loss = 0.00533573 Iteration 42, loss = 0.00519479 Iteration 43, loss = 0.00505128 Iteration 44, loss = 0.00490121 Iteration 45, loss = 0.00469161 Iteration 46, loss = 0.00459590 Iteration 47, loss = 0.00464844 Iteration 48, loss = 0.00445157 Iteration 49, loss = 0.00425515 Iteration 50, loss = 0.00424934 Iteration 51, loss = 0.00397800 Iteration 52, loss = 0.00399927 Iteration 53, loss = 0.00383932 Iteration 54, loss = 0.00372439 Iteration 55, loss = 0.00361744 Iteration 56, loss = 0.00356447 Iteration 57, loss = 0.00345899 Iteration 58, loss = 0.00336792 Iteration 59, loss = 0.00330330 Iteration 60, loss = 0.00321734 Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(16,), learning_rate='constant', learning_rate_init=0.1, max_iter=100, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1, verbose=10, warm_start=False)
mlp._predict(x)[0]
array([9.99766643e-01, 8.43331208e-11, 3.47867059e-06, 1.49956270e-07, 1.88677660e-06, 3.44652605e-05, 6.23829017e-06, 1.09043503e-04, 1.11195821e-07, 7.79837557e-05])
def sklearn_trained_classify(v):
return mlp._predict([v])[0]
test_digit_classify(sklearn_trained_classify)
1.0
Exercise: Modify the test_digit_classify
function to work on a custom range of examples in the test set. How does it do on the next 500 examples after the 1,000 training examples?
def test_digit_classify(classifier,start=0,test_count=1000):
correct = 0
end = start + test_count #<1>
for img, target in zip(digits.images[start:end], digits.target[start:end]): #<2>
v = np.matrix.flatten(img) / 15.
output = classifier(v)
answer = list(output).index(max(output))
if answer == target:
correct += 1
return (correct/test_count)
test_digit_classify(sklearn_trained_classify,start=1000,test_count=500)
0.962
Exercise: Using the squared distance cost function, what is the cost of your randomly-generated MLP for the first 1,000 training examples? What is the cost of the scikit-learn MLP?
def y_vec(digit):
return np.array([1 if i == digit else 0 for i in range(0,10)])
def cost_one(classifier,x,i):
return sum([(classifier(x)[j] - y_vec(i)[j])**2 for j in range(10)])
def total_cost(classifier):
return sum([cost_one(classifier,x[j],y[j]) for j in range(1000)])/1000.
total_cost(nn.evaluate)
8.990834701722013
total_cost(sklearn_trained_classify)
5.670512721637246e-05
Mini-project: Extract the MLPClassifier weights and biases using its properties called coefs_
and intercepts_
, respectively. Plug these weights and biases into the MLP class we built from scratch and show that your resulting MLP performs well on digit classification.
nn = MLP([64,16,10])
nn.weights = [w.T for w in mlp.coefs_]
nn.biases = mlp.intercepts_
test_digit_classify(nn.evaluate,start=1000,test_count=500)
0.962
Mini-project: Use SymPy or your own code from chapter 10 to automatically find the derivative of the sigmoid function:
$$\sigma(x) = \frac{1}{1+e^{-x}}$$Show that the answer you get is equal to $\sigma(x)(1-\sigma(x))$.
from sympy import *
X = symbols('x')
diff(1 / (1+exp(-X)),X)
exp(-x)/(1 + exp(-x))**2