Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.6.8 IPython 7.2.0 torch 1.0.0
The network in this notebook is an implementation of the ResNet-18 [1] architecture on the MNIST digits dataset (http://yann.lecun.com/exdb/mnist/) to train a handwritten digit classifier.
References
[1] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770-778). (CVPR Link)
The following figure illustrates residual blocks with skip connections such that the input passed via the shortcut matches the dimensions of the main path's output, which allows the network to learn identity functions.
The ResNet-18 architecture actually uses residual blocks with skip connections such that the input passed via the shortcut matches is resized to dimensions of the main path's output. Such a residual block is illustrated below:
For a more detailed explanation see the other notebook, resnet-ex-1.ipynb.
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 28*28
NUM_CLASSES = 10
# Other
DEVICE = "cuda:1"
GRAYSCALE = True
##########################
### MNIST DATASET
##########################
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([128, 1, 28, 28]) Image label dimensions: torch.Size([128])
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
break
Epoch: 1 | Batch index: 0 | Batch size: 128 Epoch: 2 | Batch index: 0 | Batch size: 128
The following code cell that implements the ResNet-34 architecture is a derivative of the code provided at https://pytorch.org/docs/0.4.0/_modules/torchvision/models/resnet.html.
##########################
### MODEL
##########################
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes, grayscale):
self.inplanes = 64
if grayscale:
in_dim = 1
else:
in_dim = 3
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(in_dim, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, (2. / n)**.5)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# because MNIST is already 1x1 here:
# disable avg pooling
#x = self.avgpool(x)
x = x.view(x.size(0), -1)
logits = self.fc(x)
probas = F.softmax(logits, dim=1)
return logits, probas
def resnet18(num_classes):
"""Constructs a ResNet-18 model."""
model = ResNet(block=BasicBlock,
layers=[2, 2, 2, 2],
num_classes=NUM_CLASSES,
grayscale=GRAYSCALE)
return model
torch.manual_seed(RANDOM_SEED)
model = resnet18(NUM_CLASSES)
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%%' % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 0000/0469 | Cost: 2.5552 Epoch: 001/010 | Batch 0050/0469 | Cost: 0.1593 Epoch: 001/010 | Batch 0100/0469 | Cost: 0.1098 Epoch: 001/010 | Batch 0150/0469 | Cost: 0.0836 Epoch: 001/010 | Batch 0200/0469 | Cost: 0.0846 Epoch: 001/010 | Batch 0250/0469 | Cost: 0.1061 Epoch: 001/010 | Batch 0300/0469 | Cost: 0.1361 Epoch: 001/010 | Batch 0350/0469 | Cost: 0.0166 Epoch: 001/010 | Batch 0400/0469 | Cost: 0.1310 Epoch: 001/010 | Batch 0450/0469 | Cost: 0.1225 Epoch: 001/010 | Train: 97.822% Time elapsed: 0.36 min Epoch: 002/010 | Batch 0000/0469 | Cost: 0.1019 Epoch: 002/010 | Batch 0050/0469 | Cost: 0.0406 Epoch: 002/010 | Batch 0100/0469 | Cost: 0.0383 Epoch: 002/010 | Batch 0150/0469 | Cost: 0.0501 Epoch: 002/010 | Batch 0200/0469 | Cost: 0.0945 Epoch: 002/010 | Batch 0250/0469 | Cost: 0.0163 Epoch: 002/010 | Batch 0300/0469 | Cost: 0.0213 Epoch: 002/010 | Batch 0350/0469 | Cost: 0.0348 Epoch: 002/010 | Batch 0400/0469 | Cost: 0.0236 Epoch: 002/010 | Batch 0450/0469 | Cost: 0.0249 Epoch: 002/010 | Train: 97.227% Time elapsed: 0.74 min Epoch: 003/010 | Batch 0000/0469 | Cost: 0.0535 Epoch: 003/010 | Batch 0050/0469 | Cost: 0.0187 Epoch: 003/010 | Batch 0100/0469 | Cost: 0.0272 Epoch: 003/010 | Batch 0150/0469 | Cost: 0.0949 Epoch: 003/010 | Batch 0200/0469 | Cost: 0.0341 Epoch: 003/010 | Batch 0250/0469 | Cost: 0.0314 Epoch: 003/010 | Batch 0300/0469 | Cost: 0.0180 Epoch: 003/010 | Batch 0350/0469 | Cost: 0.0214 Epoch: 003/010 | Batch 0400/0469 | Cost: 0.0969 Epoch: 003/010 | Batch 0450/0469 | Cost: 0.0419 Epoch: 003/010 | Train: 98.687% Time elapsed: 1.07 min Epoch: 004/010 | Batch 0000/0469 | Cost: 0.0341 Epoch: 004/010 | Batch 0050/0469 | Cost: 0.0065 Epoch: 004/010 | Batch 0100/0469 | Cost: 0.0252 Epoch: 004/010 | Batch 0150/0469 | Cost: 0.0136 Epoch: 004/010 | Batch 0200/0469 | Cost: 0.0950 Epoch: 004/010 | Batch 0250/0469 | Cost: 0.0405 Epoch: 004/010 | Batch 0300/0469 | Cost: 0.0049 Epoch: 004/010 | Batch 0350/0469 | Cost: 0.0050 Epoch: 004/010 | Batch 0400/0469 | Cost: 0.0074 Epoch: 004/010 | Batch 0450/0469 | Cost: 0.0442 Epoch: 004/010 | Train: 99.352% Time elapsed: 1.41 min Epoch: 005/010 | Batch 0000/0469 | Cost: 0.0231 Epoch: 005/010 | Batch 0050/0469 | Cost: 0.0157 Epoch: 005/010 | Batch 0100/0469 | Cost: 0.0369 Epoch: 005/010 | Batch 0150/0469 | Cost: 0.0227 Epoch: 005/010 | Batch 0200/0469 | Cost: 0.0801 Epoch: 005/010 | Batch 0250/0469 | Cost: 0.0293 Epoch: 005/010 | Batch 0300/0469 | Cost: 0.0252 Epoch: 005/010 | Batch 0350/0469 | Cost: 0.0079 Epoch: 005/010 | Batch 0400/0469 | Cost: 0.0813 Epoch: 005/010 | Batch 0450/0469 | Cost: 0.0304 Epoch: 005/010 | Train: 99.385% Time elapsed: 1.76 min Epoch: 006/010 | Batch 0000/0469 | Cost: 0.0110 Epoch: 006/010 | Batch 0050/0469 | Cost: 0.0402 Epoch: 006/010 | Batch 0100/0469 | Cost: 0.0013 Epoch: 006/010 | Batch 0150/0469 | Cost: 0.0032 Epoch: 006/010 | Batch 0200/0469 | Cost: 0.0288 Epoch: 006/010 | Batch 0250/0469 | Cost: 0.0733 Epoch: 006/010 | Batch 0300/0469 | Cost: 0.1322 Epoch: 006/010 | Batch 0350/0469 | Cost: 0.0026 Epoch: 006/010 | Batch 0400/0469 | Cost: 0.0504 Epoch: 006/010 | Batch 0450/0469 | Cost: 0.0512 Epoch: 006/010 | Train: 99.250% Time elapsed: 2.13 min Epoch: 007/010 | Batch 0000/0469 | Cost: 0.0273 Epoch: 007/010 | Batch 0050/0469 | Cost: 0.0214 Epoch: 007/010 | Batch 0100/0469 | Cost: 0.0034 Epoch: 007/010 | Batch 0150/0469 | Cost: 0.0036 Epoch: 007/010 | Batch 0200/0469 | Cost: 0.0019 Epoch: 007/010 | Batch 0250/0469 | Cost: 0.0437 Epoch: 007/010 | Batch 0300/0469 | Cost: 0.0076 Epoch: 007/010 | Batch 0350/0469 | Cost: 0.0311 Epoch: 007/010 | Batch 0400/0469 | Cost: 0.0146 Epoch: 007/010 | Batch 0450/0469 | Cost: 0.0008 Epoch: 007/010 | Train: 99.147% Time elapsed: 2.49 min Epoch: 008/010 | Batch 0000/0469 | Cost: 0.0550 Epoch: 008/010 | Batch 0050/0469 | Cost: 0.0357 Epoch: 008/010 | Batch 0100/0469 | Cost: 0.0021 Epoch: 008/010 | Batch 0150/0469 | Cost: 0.0570 Epoch: 008/010 | Batch 0200/0469 | Cost: 0.0040 Epoch: 008/010 | Batch 0250/0469 | Cost: 0.0118 Epoch: 008/010 | Batch 0300/0469 | Cost: 0.0097 Epoch: 008/010 | Batch 0350/0469 | Cost: 0.0011 Epoch: 008/010 | Batch 0400/0469 | Cost: 0.0399 Epoch: 008/010 | Batch 0450/0469 | Cost: 0.0395 Epoch: 008/010 | Train: 99.360% Time elapsed: 2.86 min Epoch: 009/010 | Batch 0000/0469 | Cost: 0.0060 Epoch: 009/010 | Batch 0050/0469 | Cost: 0.0824 Epoch: 009/010 | Batch 0100/0469 | Cost: 0.0235 Epoch: 009/010 | Batch 0150/0469 | Cost: 0.0135 Epoch: 009/010 | Batch 0200/0469 | Cost: 0.0273 Epoch: 009/010 | Batch 0250/0469 | Cost: 0.0391 Epoch: 009/010 | Batch 0300/0469 | Cost: 0.0624 Epoch: 009/010 | Batch 0350/0469 | Cost: 0.0203 Epoch: 009/010 | Batch 0400/0469 | Cost: 0.0012 Epoch: 009/010 | Batch 0450/0469 | Cost: 0.0095 Epoch: 009/010 | Train: 99.480% Time elapsed: 3.22 min Epoch: 010/010 | Batch 0000/0469 | Cost: 0.0043 Epoch: 010/010 | Batch 0050/0469 | Cost: 0.0057 Epoch: 010/010 | Batch 0100/0469 | Cost: 0.0165 Epoch: 010/010 | Batch 0150/0469 | Cost: 0.0011 Epoch: 010/010 | Batch 0200/0469 | Cost: 0.0006 Epoch: 010/010 | Batch 0250/0469 | Cost: 0.0017 Epoch: 010/010 | Batch 0300/0469 | Cost: 0.0226 Epoch: 010/010 | Batch 0350/0469 | Cost: 0.0282 Epoch: 010/010 | Batch 0400/0469 | Cost: 0.0430 Epoch: 010/010 | Batch 0450/0469 | Cost: 0.0077 Epoch: 010/010 | Train: 99.522% Time elapsed: 3.59 min Total Training Time: 3.59 min
with torch.set_grad_enabled(False): # save memory during inference
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
Test accuracy: 99.06%
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
nhwc_img = np.transpose(features[0], axes=(1, 2, 0))
nhw_img = np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap='Greys');
model.eval()
logits, probas = model(features.to(device)[0, None])
print('Probability 7 %.2f%%' % (probas[0][7]*100))
Probability 7 99.97%
%watermark -iv
numpy 1.15.4 pandas 0.23.4 torch 1.0.0 PIL.Image 5.3.0