In this exercise we're going to train a neural net using Pytorch.
I've already put together a cell to do the necessary imports and fetch the training and test data.
# JUST RUN THIS
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Download training data from open datasets.
train_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
Pytorch has a special object called a DataLoader
which we can use to iterate through batches of X
and y
training and test batches.
In neural nets, you need to choose a batch size. You calculate the loss for one batch before doing the backpropagation for that batch.
batch_size = ...
# Create data loaders.
train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
You pick your own batch size here, but I recommend something more than 10 and less than 200.
# YOUR CODE HERE
In Pytorch, we define a class to make a model. This should be largely copy-paste because the first layer must be 784 (28x28) and the last layer must be 10 (10 digits), but feel free to change the hidden layer's size.
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
# YOUR CODE HERE
Your model needs to be initialized. Do that. Print it to inspect it.
model = NeuralNetwork().to("cpu")
print(model)
You'll also need to initialize objects for your loss function (which says how badly a batch performed) and your optimizer (which moves the weights based on the loss).
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
# YOUR CODE HERE
One epoch is when you train your entire training dataset and then score your test dataset. You're going to do multiple epochs. Your pick as to how many (but I recommend less than 10 because it can take a while).
Make sure you initialize your model before we begin.
This is the rough scaffolding for the for-loop. We'll fill in the Train and Test sections in the next steps.
num_epochs = ...
for epoch in range(num_epochs):
# Train model for epoch
...
# Test model for epoch
...
print("Model is done!")
Here's some code for training our model for one epoch. Read through it and try and make sense of it. You should be doing this exactly once per epoch inside the for-loop defined in step 3.
# Train the model (loop over batches of training examples)
model.train()
num_training_samples = len(train_dataloader)
for i, (X, y) in enumerate(train_dataloader):
X = X.to("cpu")
y = y.to("cpu")
# Compute prediction error for the batch
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Log our progress every 100 batches
if i % 100 == 0:
print(f"loss: {loss.item():>7f} [{(i+1)*len(X):>5d}/{num_training_samples:>5d}]")
Again, we test every epoch. Here's the code.
# Test the epoch (loop over batches of testing examples)
model.eval()
num_test_samples = len(test_dataloader.dataset)
total_loss = 0
num_correct = 0
with torch.no_grad():
for X, y in test_dataloader:
X = X.to("cpu")
y = y.to("cpu")
pred = model(X)
total_loss += loss_fn(pred, y).item()
num_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# Evaluate
avg_loss = total_loss / num_test_samples
accuracy = num_correct / num_test_samples
print(f"Test Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {avg_loss:>7f}")
Train your Neural Network and track the accuracy and the loss function.
# I FILLED THIS IN FOR YOU
num_epochs = 3 # Set this to the number of your choice
for epoch in range(num_epochs):
print(f"Train/Test Epoch Round {epoch + 1}")
print(f"------------------------")
# Train the model (loop over batches of training examples)
model.train()
num_training_samples = len(train_dataloader)
for i, (X, y) in enumerate(train_dataloader):
X = X.to("cpu")
y = y.to("cpu")
# Compute prediction error for the batch
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Log our progress every 100 batches
if i % 100 == 0:
print(f"loss: {loss.item():>7f} [{(i+1)*len(X):>5d}/{num_training_samples:>5d}]")
# Test the epoch (loop over batches of testing examples)
model.eval()
num_test_samples = len(test_dataloader.dataset)
total_loss = 0
num_correct = 0
with torch.no_grad():
for X, y in test_dataloader:
X = X.to("cpu")
y = y.to("cpu")
pred = model(X)
total_loss += loss_fn(pred, y).item()
num_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# Evaluate
avg_loss = total_loss / num_test_samples
accuracy = num_correct / num_test_samples
print(f"Test Error: \n Accuracy: {(100*accuracy):>0.1f}%, Avg loss: {avg_loss:>7f}")
print("Model is done!")
It only feels real if you can test it on real images.
I pulled some of the MNIST images into our datasets/
folder. Here's some code to test your model on them and show both the image and the model's prediction.
from torchvision import transforms
from PIL import Image
from google.colab import drive
import matplotlib.pyplot as plt
drive.mount('/content/gdrive')
# Define the image preprocessing steps
preprocess = transforms.Compose([
transforms.Grayscale(num_output_channels=1), # Convert to grayscale
transforms.Resize((28, 28)), # Resize to 28x28 pixels
transforms.ToTensor(), # Convert to tensor
transforms.Normalize((0.5,), (0.5,)) # Normalize to [-1, 1]
])
# Set the model to eval mode
model.eval()
# Loop over 10 images
for i in range(1, 11):
# Open the image
img_path = f"/content/gdrive/MyDrive/datasets/mnist_test_sample/img_{i}.jpg"
img = Image.open(img_path)
# Preprocess the image
img_tensor = preprocess(img)
img_tensor = img_tensor.unsqueeze(0) # Add batch dimension
# Make prediction
with torch.no_grad():
output = model(img_tensor)
predicted_class = output.argmax(1).item()
# Display the image
plt.figure(figsize=(2, 2)) # Set figure size to 2x2 inches
plt.imshow(img, cmap="gray")
plt.title("Input Image")
plt.axis("off")
plt.show()
# Print the prediction
print(f"Predicted class: {predicted_class}")
# YOUR CODE HERE
If you get this far and we're still working, try changing things about your model to make the accuracy higher.
Ideas:
Test your own handwriting!
Navigate to http://drawmnist.com/
Draw a number and download it locally.
Open the 📁 icon on the right side of Colab
Drag-and-drop your image into the folder
Test it against your trained model with the code below
img_path = f"/content/digit_28x28.jpg"
img = Image.open(img_path)
# Preprocess the image
img_tensor = preprocess(img)
img_tensor = img_tensor.unsqueeze(0) # Add batch dimension
# Make prediction
with torch.no_grad():
output = model(img_tensor)
predicted_class = output.argmax(1).item()
# Display the image
plt.figure(figsize=(2, 2)) # Set figure size to 2x2 inches
plt.imshow(img, cmap="gray")
plt.title("Input Image")
plt.axis("off")
plt.show()
# Print the prediction
print(f"Predicted class: {predicted_class}")
# YOUR CODE HERE