#!/usr/bin/env python # coding: utf-8 # Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks. # # - Author: Sebastian Raschka # - GitHub Repository: https://github.com/rasbt/deeplearning-models # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch,pytorch_lightning") # ## Higher-level PyTorch APIs: a short introduction to PyTorch Lightning (checkpointing the best model) # ### Showing how to checkpoint the best model # ### Setting up the PyTorch Lightning model # In[2]: import pytorch_lightning as pl import torch import torch.nn as nn from torchmetrics import Accuracy # In[3]: class MultiLayerPerceptron(pl.LightningModule): def __init__(self,image_shape=(1, 28, 28), hidden_units=(32, 16)): super().__init__() # new PL attributes: self.train_acc = Accuracy() self.valid_acc = Accuracy() self.test_acc = Accuracy() # Model similar to previous section: input_size = image_shape[0] * image_shape[1] * image_shape[2] all_layers = [nn.Flatten()] for hidden_unit in hidden_units: layer = nn.Linear(input_size, hidden_unit) all_layers.append(layer) all_layers.append(nn.ReLU()) input_size = hidden_unit all_layers.append(nn.Linear(hidden_units[-1], 10)) all_layers.append(nn.Softmax(dim=1)) self.model = nn.Sequential(*all_layers) def forward(self, x): x = self.model(x) return x def training_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = nn.functional.cross_entropy(self(x), y) preds = torch.argmax(logits, dim=1) self.train_acc.update(preds, y) self.log("train_loss", loss, prog_bar=True) return loss def training_epoch_end(self, outs): self.log("train_acc", self.train_acc.compute()) self.train_acc.reset() def validation_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = nn.functional.cross_entropy(self(x), y) preds = torch.argmax(logits, dim=1) self.valid_acc.update(preds, y) self.log("valid_loss", loss, prog_bar=True) return loss def validation_epoch_end(self, outs): self.log("valid_acc", self.valid_acc.compute(), prog_bar=True) self.valid_acc.reset() def test_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = nn.functional.cross_entropy(self(x), y) preds = torch.argmax(logits, dim=1) self.test_acc.update(preds, y) self.log("test_loss", loss, prog_bar=True) self.log("test_acc", self.test_acc.compute(), prog_bar=True) return loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=0.001) return optimizer # ### Setting up the data loaders # In[4]: from torch.utils.data import DataLoader from torch.utils.data import random_split from torchvision.datasets import MNIST from torchvision import transforms # In[5]: class MnistDataModule(pl.LightningDataModule): def __init__(self, data_path='./'): super().__init__() self.data_path = data_path self.transform = transforms.Compose([transforms.ToTensor()]) def prepare_data(self): MNIST(root=self.data_path, download=True) def setup(self, stage=None): # stage is either 'fit', 'validate', 'test', or 'predict' # here note relevant mnist_all = MNIST( root=self.data_path, train=True, transform=self.transform, download=False ) self.train, self.val = random_split( mnist_all, [55000, 5000], generator=torch.Generator().manual_seed(1) ) self.test = MNIST( root=self.data_path, train=False, transform=self.transform, download=False ) def train_dataloader(self): return DataLoader(self.train, batch_size=64, num_workers=4) def val_dataloader(self): return DataLoader(self.val, batch_size=64, num_workers=4) def test_dataloader(self): return DataLoader(self.test, batch_size=64, num_workers=4) torch.manual_seed(1) mnist_dm = MnistDataModule() # ### Training the model using the PyTorch Lightning Trainer class # In[6]: from pytorch_lightning.callbacks import ModelCheckpoint callbacks = [ModelCheckpoint(save_top_k=1, mode='max', monitor="valid_acc")] # save top 1 model # In[7]: mnistclassifier = MultiLayerPerceptron() if torch.cuda.is_available(): # if you have GPUs trainer = pl.Trainer(max_epochs=300, callbacks=callbacks, gpu=1) else: trainer = pl.Trainer(max_epochs=300, callbacks=callbacks) trainer.fit(model=mnistclassifier, datamodule=mnist_dm) # ### Evaluating the model using TensorBoard # - By default, the last model is usually evaluated: # In[8]: trainer.test(model=mnistclassifier, datamodule=mnist_dm) # In[12]: from IPython.display import Image Image(filename='lightning-mlp-best-model_images/1.png') # In[10]: # Start tensorboard get_ipython().run_line_magic('load_ext', 'tensorboard') get_ipython().run_line_magic('tensorboard', '--logdir lightning_logs/') # Based on the TensorBoard output above, we can see that the best model is not the model after the final training step. We can load the best for evaluation as follows: # In[11]: trainer.test(model=mnistclassifier, datamodule=mnist_dm, ckpt_path='best') # In[ ]: