Transfer Learning is the re-use of pretrained models on new tasks. Most often, the two tasks are different but somehow related to each other. For example, a model which was trained on image classification might have learnt image features which can also be harnessed for other image related tasks. This technique became increasingly popular in the field of Deep Learning since it enables one to train a model on comparatively little data.
The dataset can be downloaded from Kaggle.
Freeze the intermediate layers and only train a few layers close to the output.
Figures taken from https://www.kaggle.com/pmigdal/alien-vs-predator-images
# execute only if you're using Google Colab
!wget -q https://raw.githubusercontent.com/ahug/amld-pytorch-workshop/master/binder/requirements.txt -O requirements.txt
!pip install -qr requirements.txt
!mkdir -p data
!curl -L -o alien-vs-predator.zip "https://drive.google.com/uc?id=1IGiEW3Vtf-ZiLINHCGVDM0NRSkyiYT98&export=download"
!unzip -oq alien-vs-predator.zip -d data/
!rm alien-vs-predator.zip
!ls -l data/alien-vs-predator/
# for PIL.Image
!pip install --no-cache-dir -I pillow
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import colorama
matplotlib.rc('font', size=16)
data/alien-vs-predator
│
└───train
│ │
│ │───alien
│ │ │ 20.jpg
│ │ │ 104.jpg
│ │ └ ...
│ │
│ └───predator
│ │ 1.jpg
│ │ 78.jpg
│ └ ...
│
└───validation
│
│───alien
│ │ 233.jpg
│ │ 12.jpg
│ └ ...
│
└───predator
│ 22.jpg
│ 77.jpg
└ ...
In the training dataloader, we can very easily add preprocessing steps to augment the data (scaling, flipping, etc.)
data_dir = os.path.join(os.getcwd(), "data", "alien-vs-predator")
train_data = datasets.ImageFolder(os.path.join(data_dir, "train"),
transform = transforms.Compose([
transforms.RandomResizedCrop(224), # randomly crops and scales it
transforms.RandomHorizontalFlip(),
transforms.ToTensor()
]))
test_data = datasets.ImageFolder(os.path.join(data_dir, "validation"),
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor()
]))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)
class_names = train_data.classes
Transformers can be used and stacked on top of each other in a similar fashion as modules. For training, they provide easy-to-use functionalities for data-augmentation.
resize_transformer = transforms.Resize(400)
horizontal_flip_transformer = transforms.RandomHorizontalFlip()
random_resize_crop_transformer = transforms.RandomResizedCrop(250, scale=(0.5, 1))
tensor_transformer = transforms.ToTensor()
preview_data = datasets.ImageFolder(os.path.join(data_dir, "train"))
img, label = next(iter(preview_data))
fig = plt.figure(figsize=(16,9))
plt.subplot(1, 4, 1)
plt.xlabel('Original')
plt.imshow(tensor_transformer(img).permute(1, 2, 0))
plt.subplot(1, 4, 2)
plt.xlabel('Resized (400x400)')
plt.imshow(tensor_transformer(resize_transformer(img)).permute(1, 2, 0))
plt.subplot(1, 4, 3)
plt.xlabel('Random Horizontal Flip')
plt.imshow(tensor_transformer(horizontal_flip_transformer(img)).permute(1, 2, 0))
plt.subplot(1, 4, 4)
plt.xlabel('Random resizing + croping')
plt.imshow(tensor_transformer(random_resize_crop_transformer(img)).permute(1, 2, 0))
data, labels = next(iter(test_loader))
data, labels = data[:5], labels[:5]
fig = plt.figure(figsize=(16,9))
for i in range(0, 5):
fig.add_subplot(1, 5, i+1)
plt.imshow(data[i].permute(1, 2, 0))
plt.xlabel(class_names[labels[i]])
from torchvision import models
print(dir(models))
model_ft = models.resnet18(pretrained=True)
model_ft
Last fully connected layer has a 1000 output neurons (has been trained on ImageNet which consists of 1000 categories).
model_ft.fc
We would like to perform binary classification (alien/predator). Therefore, we have to replace the last fully-connected layer to suit our needs (two output units).
model_ft.fc = nn.Linear(in_features=512, out_features=2)
model_ft.fc
So, now the architecture contains two output units, we can therefore use it to perform binary classification.
The train_cnn and accuracy function are almost identical to the functions we used when traininig the CNN. This again nicely demonstrates the modularity of PyTorch and its simple interface.
def train(model, train_loader, test_loader, device, num_epochs=3, lr=0.1, use_scheduler=False):
model.train() # not necessary in our example, but still good practice since modules
# like nn.Dropout, nn.BatchNorm require it
# define an optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
if use_scheduler:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.85)
for epoch in range(num_epochs):
print("="*40, "Starting epoch %d" % (epoch + 1), "="*40)
if use_scheduler:
scheduler.step()
cum_loss = 0
# dataloader returns batches of images for 'data' and a tensor with their respective labels in 'labels'
for batch_idx, (data, labels) in enumerate(train_loader):
data, labels = data.to(device), labels.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
cum_loss += loss.item()
if batch_idx % 5 == 0:
print("Batch %d/%d" % (batch_idx, len(train_loader)))
train_acc = accuracy(model, train_loader, device)
test_acc = accuracy(model, test_loader, device)
print(colorama.Fore.GREEN + "\nEpoch %d/%d, Loss=%.4f, Train-Acc=%d%%, Valid-Acc=%d%%"
% (epoch+1, num_epochs, cum_loss/len(train_data), 100*train_acc, 100*test_acc), colorama.Fore.RESET)
def accuracy(model, dataloader, device):
""" Computes the model's accuracy on the data provided by 'dataloader'
"""
model.eval()
num_correct = 0
num_samples = 0
with torch.no_grad(): # deactivates autograd, reduces memory usage and speeds up computations
for data, labels in dataloader:
data, labels = data.to(device), labels.to(device)
predictions = model(data).max(1)[1] # indices of the maxima along the second dimension
num_correct += (predictions == labels).sum().item()
num_samples += predictions.shape[0]
return num_correct / num_samples
for name, param in model_ft.named_parameters():
if name not in ["fc.weight", "fc.bias"]:
param.requires_grad = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_ft = model_ft.to(device)
train(model_ft, train_loader, test_loader, device, num_epochs=2)
def visualize_predictions(model, dataloader, device):
data, labels = next(iter(dataloader))
data, labels = data[:10].to(device), labels[:10]
predictions = model(data).max(1)[1].cpu()
predictions, data = predictions.cpu(), data.cpu() # put it back on CPU for visualization
plt.figure(figsize=(16,9))
for i in range(5):
img = data.squeeze(1)[i]
plt.subplot(1, 5, i+1)
plt.imshow(img.permute(1, 2, 0))
plt.xlabel("%s\n (%s)" % (test_data.classes[predictions[i].item()], test_data.classes[labels[i]]), fontsize=18)
plt.xticks([])
plt.yticks([])
visualize_predictions(model_ft, test_loader, device)