Notebook

Deep Learning¶

1. Linear Regression¶

https://d2l.ai/chapter_linear-regression/

1.1. Linear regression from scratch in NumPy¶

In [1]:

import numpy as np

# Define the true weights and bias of the model
w_true = np.array([2, -3.4])
b_true = 4.2

# Construct a random generator, seeded for reproducibility
rng = np.random.default_rng(seed=0)

# Generate the inputs (from a standard normal distribution) and outputs (with some Gaussian noise)
number_examples = 1000
input_size = len(w_true)
X = rng.normal(0, 1, (number_examples, input_size))
y = np.matmul(X, w_true) + b_true + rng.normal(0, 0.01, number_examples)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
learning_rate = 0.03

# Initialize the weights and bias to recover
w = rng.normal(0, 1, input_size)
b = 0

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = np.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate random indices for all the examples
    example_indices = np.arange(number_examples)
    rng.shuffle(example_indices)
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in minibatches
    for j in np.arange(0, number_examples, batch_size):
        
        # Get the indices of the examples for one minibatch
        batch_indices = example_indices[j:min(j+batch_size, number_examples)]
        
        # Get the inputs and outputs for the current minibatch
        X_batch = X[batch_indices, :]
        y_batch = y[batch_indices]
        
        # Compute the predicted outputs
        y_hat = np.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = 0.5*np.power(y_hat-y_batch, 2)
        
        # Save the mean loss for the current minibatch
        batch_loss.append(np.mean(l))
        
        # Update the weights and bias using stochastic gradient descent (SGD)
        w = w - learning_rate*np.mean(X_batch*(y_hat-y_batch)[:, None], axis=0)
        b = b - learning_rate*np.mean(y_hat-y_batch, axis=0)
        
    # Save the mean loss for the current epoch
    epoch_loss[i] = np.mean(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {w}')
print(f'b = {b}')

1/3: 2.8028630762446722
2/3: 0.005944852663986381
3/3: 6.104357869966443e-05

w = [ 1.99956731 -3.39973415]
b = 4.198903523007357

1.2. Linear regression from scratch in PyTorch¶

In [4]:

import torch

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs and outputs
number_examples = 1000
input_size = len(w_true)
X = torch.normal(0, 1, (number_examples, input_size))
y = torch.matmul(X, w_true) + b_true + torch.normal(0, 0.01, [number_examples])

# Define a function to read the dataset in random minibatches
def batch(X, y, batch_size):
    
    # Generate random indices for all the examples
    number_examples = X.shape[0]
    example_indices = torch.randperm(number_examples)
    
    # Loop over the examples in minibatches
    for i in range(0, number_examples, batch_size):
        
        # Get the indices of the examples for one minibatch
        batch_indices = example_indices[i:min(i+batch_size, number_examples)]
        
        # Return the input and output for the current minibatch and continue the iteration in the function
        yield X[batch_indices], y[batch_indices]

# Define the parameters for the training
number_epochs = 3
batch_size = 10
learning_rate = 0.03

# Initialize the weights and bias to recover, requiring the gradients to be computed
w = torch.normal(0, 1, [input_size], requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)
        
# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in minibatches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = torch.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = 0.5*(y_hat-y_batch)**2
        
        # Compute the gradient on l wrt w and b
        # (sum and not mean as the gradients will be divided by the batch size during SGD)
        l.sum().backward()
        
        # Save the mean loss for the current minibatch
        batch_loss.append(l.mean())
        
        # Temporarily sets all the requires_grad flags to false
        with torch.no_grad():
            
            # Update the weights and bias using SGD
            # (use augmented assignments to avoid modifying existing variables)
            w -= learning_rate*w.grad/len(l)
            b -= learning_rate*b.grad/len(l)
            
            # Set the gradients to zeros to avoid accumulating gradients
            w.grad.zero_()
            b.grad.zero_()
            
    # Save the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {w}')
print(f'b = {b}')

1/3: 3.0891919136047363
2/3: 0.006769159343093634
3/3: 6.386495078913867e-05

w = tensor([ 1.9992, -3.3997], requires_grad=True)
b = tensor([4.1992], requires_grad=True)

1.3. Linear regression using APIs in PyTorch¶

In [1]:

import torch
from torch.utils import data
from torch import nn

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs and outputs
number_examples = 1000
input_size = len(w_true)
X = torch.normal(0, 1, (number_examples, input_size))
y = torch.matmul(X, w_true) + b_true + torch.normal(0, 0.01, [number_examples])

# Define a function to read the dataset in random minibatches by using data iterator
def batch(X, y, batch_size):
    data_set = data.TensorDataset(*(X, y))
    return data.DataLoader(data_set, batch_size, shuffle=True)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
learning_rate = 0.03

# Define the model with a fully-connected layer
model = nn.Sequential(nn.Linear(input_size, 1))

# Initialize the parameters
model[0].weight.data.normal_(0, 0.01)
model[0].bias.data.fill_(0)

# Define the loss function (mean squared error, without the 0.5 factor)
loss = nn.MSELoss()

# Define the optimization algorithm (SGD)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in minibatches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = model(X_batch)
        
        # Compute the loss between the predicted and true outputs
        l = loss(y_hat, y_batch[:, None])
        
        # Save the loss for the current minibatch
        batch_loss.append(l)
        
        # Set the gradients to zero
        optimizer.zero_grad()
        
        # Computes the gradient
        l.backward()
        
        # Performs a single parameter update
        optimizer.step()
        
    # Save the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
        
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {model[0].weight.data}')
print(f'b = {model[0].bias.data}')

1/3: 2.8517115116119385
2/3: 0.0001156603466370143
3/3: 0.00010338309220969677

w = tensor([[ 2.0001, -3.3987]])
b = tensor([4.1996])

1.4. Linear regression using higher-level APIs in Keras¶

In [45]:

import tensorflow as tf

# Define the true weights and bias of the model
w_true = tf.constant([2, -3.4], shape=[2, 1])
b_true = tf.constant(4.2)

# Generate inputs and outputs
number_examples = 1000
input_size = len(w_true)
tf.random.set_seed(0)
X = tf.random.normal([number_examples, input_size], 0, 1)
y = tf.matmul(X, w_true) + b_true + tf.random.normal([number_examples], 0, 0.01)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
learning_rate = 0.03

# Define the model with a densely-connected NN layer with initialized parameters
model = tf.keras.Sequential([tf.keras.layers.Dense(1, \
                                                   kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), \
                                                   bias_initializer='zeros')])

# Configure the model for training with SGD optimizer and MSE loss
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), \
              loss=tf.keras.losses.MeanSquaredError())

# Train the model given the batch size and number of epochs
model.fit(x=X, y=y, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Print the predicted weights and bias
print('')
print(f'w = {model.get_weights()[0]}')
print(f'b = {model.get_weights()[1]}')

Epoch 1/3
100/100 [==============================] - 0s 469us/step - loss: 2.8948
Epoch 2/3
100/100 [==============================] - 0s 413us/step - loss: 1.1406e-04
Epoch 3/3
100/100 [==============================] - 0s 449us/step - loss: 1.0830e-04

w = [[ 2. ]
 [-3.4]]
b = [4.199775]

2. Softmax Regression¶

https://d2l.ai/chapter_linear-classification/

2.1. Fashion-MNIST dataset¶

In [2]:

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Get the Fashion-MNIST dataset, with train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Normalize the inputs
X_train = X_train/255
X_test = X_test/255

# Translate the outputs into labels
label_list = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
label_train = [label_list[i] for i in y_train]
label_test = [label_list[i] for i in y_test]

# Show a single example for the different classes
number_classes = len(label_list)
plt.figure(figsize=(18, 2))
for i in range(number_classes):
    j = np.where(y_train==i)[0][0]
    plt.subplot(1, number_classes, i+1)
    plt.imshow(X_train[j, :, :], cmap='binary')
    plt.title(label_list[i])
    plt.xticks([])
    plt.yticks([])
plt.show()

2.2. Softmax regression from scratch in NumPy¶

In [1]:

import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf

# Get the train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
number_train = len(X_train)
number_test = len(X_test)

# Normalize and flatten the inputs
input_size = np.size(X_train[0])
X_train = np.reshape(X_train/255, (number_train, input_size))
X_test = np.reshape(X_test/255, (number_test, input_size))

# Derive one-hot versions of the train outputs
output_size = 10
Y_train = np.zeros((number_train, output_size))
Y_train[np.arange(number_train), y_train] = 1

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Initialize the weights and bias to recover
W = np.random.default_rng().normal(0, 0.01, size=(input_size, output_size))
b = np.zeros(output_size)

# Initialize lists for the mean train loss and accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]

# Initialize a list for the test accuracy overall for every epoch
test_accuracy = [None]*number_epochs

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate random indices for all the train examples
    train_indices = np.arange(number_train)
    random.shuffle(train_indices)
    
    # Loop over the train examples in minibatches
    for j in np.arange(0, number_train, batch_size):
        
        # Get the indices of the train examples for one minibatch
        batch_indices = train_indices[j:min(j+batch_size, number_train)]
        
        # Get the train inputs and outputs for the minibatch
        X = X_train[batch_indices, :]
        y = y_train[batch_indices]
        Y = Y_train[batch_indices]
        
        # Compute the predicted outputs (logits)
        O = np.matmul(X, W) + b
        
        # Compute the softmax of the logits (indirectly to avoid numerical stability issues)
        O = O-np.max(O, axis=1)[:, None]
        O_exp = np.exp(O)
        Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
        
        # Compute the mean cross-entropy loss for the minibatch and save it
        l = np.mean(np.log(np.sum(O_exp, axis=1)-np.sum(Y*O, axis=1)))
        train_loss[i].append(l)
        
        # Compute the mean accuracy for the minibatch and save it
        a = np.mean(np.argmax(Y_hat, axis=1)==y)
        train_accuracy[i].append(a)
        
        # Update the weights and bias using SGD
        dl = Y_hat-Y
        W = W-learning_rate*np.matmul(X.T, dl)/np.shape(X)[0]
        b = b-learning_rate*np.mean(dl, axis=0)
        
    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = np.mean(train_loss[i])
    train_accuracy[i] = np.mean(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for the current epoch
    O = np.matmul(X_test, W) + b
    O = O-np.max(O, axis=1)[:, None]
    O_exp = np.exp(O)
    Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
    test_accuracy[i] = np.mean(np.argmax(Y_hat, axis=1)==y_test)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
number_examples = 10
O = np.matmul(X_test[:number_examples, :], W) + b
O = O-np.max(O, axis=1)[:, None]
O_exp = np.exp(O)
Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
y_hat = np.argmax(Y_hat, axis=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(np.reshape(X_test[i, :], (28, 28))*255, cmap='binary')
    plt.title(f'True: {label_list[y_test[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=0.646; train_accuracy=0.751; test_accuracy=0.784
2/10: train_loss=0.441; train_accuracy=0.814; test_accuracy=0.799
3/10: train_loss=0.398; train_accuracy=0.826; test_accuracy=0.810
4/10: train_loss=0.375; train_accuracy=0.831; test_accuracy=0.811
5/10: train_loss=0.360; train_accuracy=0.836; test_accuracy=0.813
6/10: train_loss=0.348; train_accuracy=0.841; test_accuracy=0.821
7/10: train_loss=0.340; train_accuracy=0.843; test_accuracy=0.818
8/10: train_loss=0.334; train_accuracy=0.845; test_accuracy=0.829
9/10: train_loss=0.329; train_accuracy=0.846; test_accuracy=0.831
10/10: train_loss=0.324; train_accuracy=0.848; test_accuracy=0.834

2.3. Softmax regression from scratch in PyTorch¶

In [2]:

import matplotlib.pyplot as plt
import torch
import torchvision
from torch.utils import data

# Get the dataset (transform the image data from PIL type to normalized 32-bit floating point tensors)
fmnist_train = torchvision.datasets.FashionMNIST(root='data', train=True, download=True, 
                                                 transform=torchvision.transforms.ToTensor())
fmnist_test = torchvision.datasets.FashionMNIST(root='data', train=False, download=True,
                                                transform=torchvision.transforms.ToTensor())

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Use data iterators to read a minibatch at each iteration, shuffling the examples for the train set and using 4 processes
train_iter = data.DataLoader(fmnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(fmnist_test, batch_size, shuffle=False, num_workers=4)

# Initialize the parameters to recover, requiring the gradients to be computed
input_size = fmnist_train[0][0].nelement()
output_size = 10
W = torch.normal(0, 0.01, size=(input_size, output_size), requires_grad=True)
b = torch.zeros(output_size, requires_grad=True)

# Initialize lists for the mean train loss, train and test accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]
test_accuracy = [[] for _ in range(number_epochs)]

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the train examples in minibatches
    for X, y in train_iter:
        
        # Compute the logits, after flattening the images
        O = torch.matmul(torch.reshape(X, (-1, input_size)), W) + b

        # Compute the softmax of the logits
        O_exp = torch.exp(O)
        Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
        
        # Compute the cross-entropy loss (use the indices of the true classes in y_batch 
        # to get the corresponding probabilities in y_batch, for all the examples)
        l = -torch.log(Y_hat[range(Y_hat.shape[0]), y])
        
        # Save the mean loss for the current minibatch
        train_loss[i].append(torch.mean(l).item())
        
        # Compute the mean accuracy for the current minibatch and save it
        a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
        train_accuracy[i].append(a)
        
        # Compute the gradient on l with respect to W and b
        # (sum and not mean as the gradients will be divided by the batch size during SGD)
        torch.sum(l).backward()
        
        # Disable gradient calculation for the following operations not to be differentiable
        with torch.no_grad():
            
            # Update the weights and bias using SGD
            # (use augmented assignments to avoid modifying existing variables)
            W -= learning_rate*W.grad/len(l)
            b -= learning_rate*b.grad/len(l)
            
            # Set the gradients to zeros to avoid accumulating gradients
            W.grad.zero_()
            b.grad.zero_()
    
    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = sum(train_loss[i])/len(train_loss[i])
    train_accuracy[i] = sum(train_accuracy[i])/len(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for every epoch, in minibatches
    with torch.no_grad():
        for X, y in test_iter:
            O = torch.matmul(torch.reshape(X, (-1, input_size)), W) + b
            O_exp = torch.exp(O)
            Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
            a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
            test_accuracy[i].append(a)
    test_accuracy[i] = sum(test_accuracy[i])/len(test_accuracy[i])
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
for X, y in test_iter:
    break
number_examples = 10
O = torch.matmul(torch.reshape(X[:number_examples], (-1, input_size)), W) + b
O_exp = torch.exp(O)
Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
y_hat = torch.argmax(Y_hat, dim=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X[i][0], cmap='binary')
    plt.title(f'True: {label_list[y[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=0.785; train_accuracy=0.750; test_accuracy=0.791
2/10: train_loss=0.570; train_accuracy=0.813; test_accuracy=0.802
3/10: train_loss=0.524; train_accuracy=0.826; test_accuracy=0.820
4/10: train_loss=0.501; train_accuracy=0.831; test_accuracy=0.825
5/10: train_loss=0.486; train_accuracy=0.838; test_accuracy=0.823
6/10: train_loss=0.475; train_accuracy=0.839; test_accuracy=0.825
7/10: train_loss=0.466; train_accuracy=0.842; test_accuracy=0.832
8/10: train_loss=0.457; train_accuracy=0.845; test_accuracy=0.832
9/10: train_loss=0.451; train_accuracy=0.847; test_accuracy=0.825
10/10: train_loss=0.447; train_accuracy=0.848; test_accuracy=0.828

2.4. Softmax regression using APIs in PyTorch¶

In [3]:

import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils import data
import torchvision

# Get the dataset (transform the image data from PIL type to normalized 32-bit floating point tensors)
fmnist_train = torchvision.datasets.FashionMNIST(root='data', train=True, download=True, 
                                                 transform=torchvision.transforms.ToTensor())
fmnist_test = torchvision.datasets.FashionMNIST(root='data', train=False, download=True,
                                                transform=torchvision.transforms.ToTensor())

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Use data iterators to read a minibatch at each iteration, shuffling the examples for the train set and using 4 processes
train_iter = data.DataLoader(fmnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(fmnist_test, batch_size, shuffle=False, num_workers=4)

# Define the model, with a flatten layer to reshape the inputs before the fully-connected layer
input_size = fmnist_train[0][0].nelement()
output_size = 10
model = nn.Sequential(nn.Flatten(), nn.Linear(input_size, output_size))

# Initialize the parameters by applying a function recursively to every submodule
def init(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, std=0.01)
model.apply(init);

# Define the loss function (with no reduction applied to the output, no mean, no sum, none)
loss = nn.CrossEntropyLoss(reduction='none')

# Define the optimization algorithm
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Initialize lists for the mean train loss, train and test accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]
test_accuracy = [[] for _ in range(number_epochs)]

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the train examples in minibatches
    for X, y in train_iter:
        
        # Compute the predicted outputs
        Y_hat = model(X)
        
        # Compute the loss
        l = loss(Y_hat, y)
        
        # Save the mean loss for the current minibatch
        train_loss[i].append(torch.mean(l).item())
        
        # Compute the mean accuracy for the current minibatch and save it
        a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
        train_accuracy[i].append(a)
        
        # Set the gradients to zero
        optimizer.zero_grad()
        
        # Compute the gradient
        l.mean().backward()
        
        # Performs a single parameter update
        optimizer.step()
        
    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = sum(train_loss[i])/len(train_loss[i])
    train_accuracy[i] = sum(train_accuracy[i])/len(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for every epoch, in minibatches
    with torch.no_grad():
        for X, y in test_iter:
            Y_hat = model(X)
            a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
            test_accuracy[i].append(a)
    test_accuracy[i] = sum(test_accuracy[i])/len(test_accuracy[i])
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
for X, y in test_iter:
    break
number_examples = 10
Y_hat = model(X[:number_examples])
y_hat = torch.argmax(Y_hat, dim=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X[i][0], cmap='binary')
    plt.title(f'True: {label_list[y[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=0.783; train_accuracy=0.752; test_accuracy=0.793
2/10: train_loss=0.570; train_accuracy=0.813; test_accuracy=0.810
3/10: train_loss=0.525; train_accuracy=0.827; test_accuracy=0.814
4/10: train_loss=0.500; train_accuracy=0.834; test_accuracy=0.827
5/10: train_loss=0.486; train_accuracy=0.837; test_accuracy=0.822
6/10: train_loss=0.473; train_accuracy=0.840; test_accuracy=0.824
7/10: train_loss=0.465; train_accuracy=0.843; test_accuracy=0.832
8/10: train_loss=0.458; train_accuracy=0.844; test_accuracy=0.833
9/10: train_loss=0.453; train_accuracy=0.847; test_accuracy=0.829
10/10: train_loss=0.447; train_accuracy=0.848; test_accuracy=0.822

2.5. Softmax regression using higher-level APIs in Keras¶

In [2]:

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# Get the train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train/255
X_test = X_test/255
input_size = X_train[0, :, :].shape
output_size = 10

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Define a model with flattened inputs and a densely-connected NN layer
model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=input_size),
                             tf.keras.layers.Dense(output_size,
                                                   activation=None,
                                                   kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), 
                                                   bias_initializer='zeros')])

# Configure the model with SGD optimizer, cross-entropy loss (with integers, not one-hot), and accuracy metrics
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), \
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \
              metrics=['accuracy'])

# Train the model
model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Show some predictions
number_examples = 10
Y_hat = model.predict(X_test[:number_examples, :, :])
y_hat = np.argmax(Y_hat, axis=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X_test[i, :, :], cmap='binary')
    plt.title(f'True: {label_list[y_test[i]]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

Epoch 1/10
235/235 [==============================] - 1s 765us/step - loss: 0.7853 - accuracy: 0.7506
Epoch 2/10
235/235 [==============================] - 0s 755us/step - loss: 0.5703 - accuracy: 0.8134
Epoch 3/10
235/235 [==============================] - 0s 737us/step - loss: 0.5254 - accuracy: 0.8254
Epoch 4/10
235/235 [==============================] - 0s 733us/step - loss: 0.5009 - accuracy: 0.8323
Epoch 5/10
235/235 [==============================] - 0s 747us/step - loss: 0.4847 - accuracy: 0.8368
Epoch 6/10
235/235 [==============================] - 0s 751us/step - loss: 0.4743 - accuracy: 0.8402
Epoch 7/10
235/235 [==============================] - 0s 788us/step - loss: 0.4651 - accuracy: 0.8421
Epoch 8/10
235/235 [==============================] - 0s 802us/step - loss: 0.4582 - accuracy: 0.8449
Epoch 9/10
235/235 [==============================] - 0s 785us/step - loss: 0.4523 - accuracy: 0.8462
Epoch 10/10
235/235 [==============================] - 0s 757us/step - loss: 0.4466 - accuracy: 0.8479

3. Multilayer Perceptron¶

https://d2l.ai/chapter_multilayer-perceptrons

3.1. MLP from scratch in NumPy¶

In [5]:

import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf

# Get the train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
number_train = len(X_train)
number_test = len(X_test)

# Normalize and flatten the inputs
input_size = np.size(X_train[0])
X_train = np.reshape(X_train/255, (number_train, input_size))
X_test = np.reshape(X_test/255, (number_test, input_size))

# Derive one-hot versions of the train outputs
output_size = 10
Y_train = np.zeros((number_train, output_size))
Y_train[np.arange(number_train), y_train] = 1

# Initialize the weights and biases to recover
hidden_size = 256
W0 = np.random.default_rng().normal(0, 0.01, size=(input_size, hidden_size))
b0 = np.zeros(hidden_size)
W1 = np.random.default_rng().normal(0, 0.01, size=(hidden_size, output_size))
b1 = np.zeros(output_size)

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Initialize lists for the mean train loss and accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]

# Initialize a list for the test accuracy overall for every epoch
test_accuracy = [None]*number_epochs

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate random indices for all the train examples
    train_indices = np.arange(number_train)
    random.shuffle(train_indices)
    
    # Loop over the train examples in minibatches
    for j in np.arange(0, number_train, batch_size):
        
        # Get the indices of the train examples for one minibatch
        batch_indices = train_indices[j:min(j+batch_size, number_train)]
        
        # Get the train inputs and outputs for the minibatch
        X = X_train[batch_indices, :]
        y = y_train[batch_indices]
        Y = Y_train[batch_indices]
        
        # Compute the outputs of the model (with ReLU)
        H = np.matmul(X, W0) + b0
        H[H<0] = 0
        O = np.matmul(H, W1) + b1
        
        # Compute the softmax of the logits (indirectly to avoid numerical stability issues)
        O = O-np.max(O, axis=1)[:, None]
        O_exp = np.exp(O)
        Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
        
        # Compute the mean cross-entropy loss for the minibatch and save it
        l = np.mean(np.log(np.sum(O_exp, axis=1)-np.sum(Y*O, axis=1)))
        train_loss[i].append(l)
        
        # Compute the mean accuracy for the minibatch and save it
        a = np.mean(np.argmax(Y_hat, axis=1)==y)
        train_accuracy[i].append(a)
        
        # Compute the derivative of the loss wrt the output of the output layer
        dl1 = Y_hat-Y
        
        # Derive the derivative of the loss wrt the output of the hidden layer (using the chain rule)
        dl0 = np.matmul(dl1, W1.T)
               
        # Update the weights and biases of the output layer using SGD
        W1 = W1-learning_rate*np.matmul(H.T, dl1)/np.shape(H)[0]
        b1 = b1-learning_rate*np.mean(dl1, axis=0)
        
        # Update the weights and biases of the hidden layer using SGD
        W0 = W0-learning_rate*np.matmul(X.T, dl0)/np.shape(X)[0]
        b0 = b0-learning_rate*np.mean(dl0, axis=0)

    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = np.mean(train_loss[i])
    train_accuracy[i] = np.mean(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for the current epoch
    H = np.matmul(X_test, W0) + b0
    H[H<0] = 0
    O = np.matmul(H, W1) + b1
    O = O-np.max(O, axis=1)[:, None]
    O_exp = np.exp(O)
    Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
    test_accuracy[i] = np.mean(np.argmax(Y_hat, axis=1)==y_test)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
number_examples = 10
H = np.matmul(X_test[:number_examples, :], W0) + b0
H[H<0] = 0
O = np.matmul(H, W1) + b1
O = O-np.max(O, axis=1)[:, None]
O_exp = np.exp(O)
Y_hat = O_exp/np.sum(O_exp, axis=1)[:, None]
y_hat = np.argmax(Y_hat, axis=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(np.reshape(X_test[i, :], (28, 28))*255, cmap='binary')
    plt.title(f'True: {label_list[y_test[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=0.928; train_accuracy=0.625; test_accuracy=0.748
2/10: train_loss=0.462; train_accuracy=0.790; test_accuracy=0.786
3/10: train_loss=0.389; train_accuracy=0.820; test_accuracy=0.814
4/10: train_loss=0.357; train_accuracy=0.830; test_accuracy=0.820
5/10: train_loss=0.338; train_accuracy=0.838; test_accuracy=0.822
6/10: train_loss=0.327; train_accuracy=0.842; test_accuracy=0.835
7/10: train_loss=0.317; train_accuracy=0.845; test_accuracy=0.827
8/10: train_loss=0.313; train_accuracy=0.847; test_accuracy=0.834
9/10: train_loss=0.305; train_accuracy=0.851; test_accuracy=0.821
10/10: train_loss=0.301; train_accuracy=0.852; test_accuracy=0.822

3.2. MLP from scratch in PyTorch¶

In [3]:

import matplotlib.pyplot as plt
import torch
import torchvision
from torch.utils import data

# Get the dataset (transform the image data from PIL type to normalized 32-bit floating point tensors)
fmnist_train = torchvision.datasets.FashionMNIST(root='data', train=True, download=True, 
                                                 transform=torchvision.transforms.ToTensor())
fmnist_test = torchvision.datasets.FashionMNIST(root='data', train=False, download=True,
                                                transform=torchvision.transforms.ToTensor())

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Use data iterators to read a minibatch at each iteration, shuffling the examples for the train set and using 4 processes
train_iter = data.DataLoader(fmnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(fmnist_test, batch_size, shuffle=False, num_workers=4)

# Initialize the parameters to recover, requiring the gradients to be computed
input_size = fmnist_train[0][0].nelement()
output_size = 10
hidden_size = 256
W0 = torch.normal(0, 0.01, size=(input_size, hidden_size), requires_grad=True)
b0 = torch.zeros(hidden_size, requires_grad=True)
W1 = torch.normal(0, 0.01, size=(hidden_size, output_size), requires_grad=True)
b1 = torch.zeros(output_size, requires_grad=True)

# Initialize lists for the mean train loss, train and test accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]
test_accuracy = [[] for _ in range(number_epochs)]

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the train examples in minibatches
    for X, y in train_iter:
        
        # Compute the outputs of the model (with ReLU), after flattening the images
        H = torch.matmul(torch.reshape(X, (-1, input_size)), W0) + b0
        H[H<0] = 0
        O = torch.matmul(H, W1) + b1

        # Compute the softmax of the logits
        O_exp = torch.exp(O)
        Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
        
        # Compute the cross-entropy loss (use the indices of the true classes in y_batch 
        # to get the corresponding probabilities in y_batch, for all the examples)
        l = -torch.log(Y_hat[range(Y_hat.shape[0]), y])
        
        # Save the mean loss for the current minibatch
        train_loss[i].append(torch.mean(l).item())
        
        # Compute the mean accuracy for the current minibatch and save it
        a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
        train_accuracy[i].append(a)
        
        # Compute the gradient on l with respect to W and b
        # (sum and not mean as the gradients will be divided by the batch size during SGD)
        torch.sum(l).backward()
        
        # Disable gradient calculation for the following operations not to be differentiable
        with torch.no_grad():
            
            # Update the weights and biases using SGD
            # (use augmented assignments to avoid modifying existing variables)
            W1 -= learning_rate*W1.grad/len(l)
            b1 -= learning_rate*b1.grad/len(l)
            W0 -= learning_rate*W0.grad/len(l)
            b0 -= learning_rate*b0.grad/len(l)
            
            # Set the gradients to zeros to avoid accumulating gradients
            W1.grad.zero_()
            b1.grad.zero_()
            W0.grad.zero_()
            b0.grad.zero_()
    
    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = sum(train_loss[i])/len(train_loss[i])
    train_accuracy[i] = sum(train_accuracy[i])/len(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for every epoch, in minibatches
    with torch.no_grad():
        for X, y in test_iter:
            H = torch.matmul(torch.reshape(X, (-1, input_size)), W0) + b0
            H[H<0] = 0
            O = torch.matmul(H, W1) + b1
            O_exp = torch.exp(O)
            Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
            a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
            test_accuracy[i].append(a)
    test_accuracy[i] = sum(test_accuracy[i])/len(test_accuracy[i])
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
for X, y in test_iter:
    break
number_examples = 10
H = torch.matmul(torch.reshape(X[:number_examples], (-1, input_size)), W0) + b0
H[H<0] = 0
O = torch.matmul(H, W1) + b1
O_exp = torch.exp(O)
Y_hat = O_exp/torch.sum(O_exp, 1, keepdim=True)
y_hat = torch.argmax(Y_hat, dim=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X[i][0], cmap='binary')
    plt.title(f'True: {label_list[y[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=1.041; train_accuracy=0.646; test_accuracy=0.736
2/10: train_loss=0.605; train_accuracy=0.786; test_accuracy=0.790
3/10: train_loss=0.520; train_accuracy=0.818; test_accuracy=0.770
4/10: train_loss=0.482; train_accuracy=0.831; test_accuracy=0.822
5/10: train_loss=0.455; train_accuracy=0.841; test_accuracy=0.806
6/10: train_loss=0.432; train_accuracy=0.847; test_accuracy=0.834
7/10: train_loss=0.419; train_accuracy=0.851; test_accuracy=0.838
8/10: train_loss=0.403; train_accuracy=0.858; test_accuracy=0.839
9/10: train_loss=0.393; train_accuracy=0.861; test_accuracy=0.830
10/10: train_loss=0.383; train_accuracy=0.864; test_accuracy=0.849

3.3. MLP using APIs in PyTorch¶

In [1]:

import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils import data
import torchvision

# Get the dataset (transform the image data from PIL type to normalized 32-bit floating point tensors)
fmnist_train = torchvision.datasets.FashionMNIST(root='data', train=True, download=True, 
                                                 transform=torchvision.transforms.ToTensor())
fmnist_test = torchvision.datasets.FashionMNIST(root='data', train=False, download=True,
                                                transform=torchvision.transforms.ToTensor())

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Use data iterators to read a minibatch at each iteration, shuffling the examples for the train set and using 4 processes
train_iter = data.DataLoader(fmnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(fmnist_test, batch_size, shuffle=False, num_workers=4)

# Define the model, with a flatten layer to reshape the inputs, two fully-connected layer, and a ReLU in-between
input_size = fmnist_train[0][0].nelement()
hidden_size = 256
output_size = 10
model = nn.Sequential(nn.Flatten(), 
                      nn.Linear(input_size, hidden_size), 
                      nn.ReLU(), 
                      nn.Linear(hidden_size, output_size))

# Initialize the parameters by applying a function recursively to every submodule
def init(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, std=0.01)
model.apply(init);

# Define the loss function (with no reduction applied to the output, no mean, no sum, none)
loss = nn.CrossEntropyLoss(reduction='none')

# Define the optimization algorithm
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Initialize lists for the mean train loss, train and test accuracy over the minibatches for every epoch
train_loss = [[] for _ in range(number_epochs)]
train_accuracy = [[] for _ in range(number_epochs)]
test_accuracy = [[] for _ in range(number_epochs)]

# Loop over the epochs
for i in range(number_epochs):
    
    # Loop over the train examples in minibatches
    for X, y in train_iter:
        
        # Compute the predicted outputs
        Y_hat = model(X)
        
        # Compute the loss
        l = loss(Y_hat, y)
        
        # Save the mean loss for the current minibatch
        train_loss[i].append(torch.mean(l).item())
        
        # Compute the mean accuracy for the current minibatch and save it
        a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
        train_accuracy[i].append(a)
        
        # Set the gradients to zero
        optimizer.zero_grad()
        
        # Compute the gradient
        l.mean().backward()
        
        # Performs a single parameter update
        optimizer.step()
        
    # Derive the mean train loss and accuracy for the current epoch
    train_loss[i] = sum(train_loss[i])/len(train_loss[i])
    train_accuracy[i] = sum(train_accuracy[i])/len(train_accuracy[i])
    
    # Compute the test outputs and derive the test accuracy for every epoch, in minibatches
    with torch.no_grad():
        for X, y in test_iter:
            Y_hat = model(X)
            a = torch.mean((torch.argmax(Y_hat, dim=1)==y)*1.0).item()
            test_accuracy[i].append(a)
    test_accuracy[i] = sum(test_accuracy[i])/len(test_accuracy[i])
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: train_loss={train_loss[i]:.3f}; train_accuracy={train_accuracy[i]:.3f}; test_accuracy={test_accuracy[i]:.3f}')
    
# Show some predictions
for X, y in test_iter:
    break
number_examples = 10
Y_hat = model(X[:number_examples])
y_hat = torch.argmax(Y_hat, dim=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X[i][0], cmap='binary')
    plt.title(f'True: {label_list[y[i].item()]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

1/10: train_loss=1.040; train_accuracy=0.645; test_accuracy=0.712
2/10: train_loss=0.598; train_accuracy=0.790; test_accuracy=0.799
3/10: train_loss=0.518; train_accuracy=0.819; test_accuracy=0.810
4/10: train_loss=0.478; train_accuracy=0.833; test_accuracy=0.825
5/10: train_loss=0.454; train_accuracy=0.839; test_accuracy=0.833
6/10: train_loss=0.431; train_accuracy=0.848; test_accuracy=0.837
7/10: train_loss=0.415; train_accuracy=0.854; test_accuracy=0.843
8/10: train_loss=0.405; train_accuracy=0.858; test_accuracy=0.849
9/10: train_loss=0.391; train_accuracy=0.862; test_accuracy=0.839
10/10: train_loss=0.382; train_accuracy=0.864; test_accuracy=0.853

3.4. MLP using higher-level APIs in Keras¶

In [4]:

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# Get the train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train/255
X_test = X_test/255
input_size = X_train[0, :, :].shape
hidden_size = 256
output_size = 10

# Define the parameters for the training
number_epochs = 10
batch_size = 256
learning_rate = 0.1

# Define a model with flattened inputs, a densely-connected NN layer with a ReLU, and another one without activation
model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=input_size), 
                             tf.keras.layers.Dense(hidden_size, 
                                                   activation='relu', 
                                                   kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), 
                                                   bias_initializer='zeros'), 
                             tf.keras.layers.Dense(output_size, 
                                                   activation=None, 
                                                   kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), 
                                                   bias_initializer='zeros')])

# Configure the model with SGD optimizer, cross-entropy loss (with integers, not one-hot), and accuracy metrics
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), \
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \
              metrics=['accuracy'])

# Train the model
model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Show some predictions
number_examples = 10
Y_hat = model.predict(X_test[:number_examples, :, :])
y_hat = np.argmax(Y_hat, axis=1)
label_list = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
plt.figure(figsize=(18, 2))
for i in range(number_examples):
    plt.subplot(1, number_examples, i+1)
    plt.imshow(X_test[i, :, :], cmap='binary')
    plt.title(f'True: {label_list[y_test[i]]}\n Pred: {label_list[y_hat[i]]}')
    plt.xticks([])
    plt.yticks([])
plt.show()

Epoch 1/10
235/235 [==============================] - 1s 2ms/step - loss: 1.0377 - accuracy: 0.6388
Epoch 2/10
235/235 [==============================] - 0s 2ms/step - loss: 0.5989 - accuracy: 0.7903
Epoch 3/10
235/235 [==============================] - 0s 2ms/step - loss: 0.5175 - accuracy: 0.8191
Epoch 4/10
235/235 [==============================] - 0s 2ms/step - loss: 0.4774 - accuracy: 0.8320
Epoch 5/10
235/235 [==============================] - 0s 2ms/step - loss: 0.4517 - accuracy: 0.8424
Epoch 6/10
235/235 [==============================] - 0s 2ms/step - loss: 0.4316 - accuracy: 0.8482
Epoch 7/10
235/235 [==============================] - 0s 2ms/step - loss: 0.4165 - accuracy: 0.8530
Epoch 8/10
235/235 [==============================] - 0s 2ms/step - loss: 0.4021 - accuracy: 0.8579
Epoch 9/10
235/235 [==============================] - 0s 2ms/step - loss: 0.3896 - accuracy: 0.8623
Epoch 10/10
235/235 [==============================] - 0s 2ms/step - loss: 0.3800 - accuracy: 0.8653

In [ ]: