#!/usr/bin/env python # coding: utf-8 # Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks. # - Author: Sebastian Raschka # - GitHub Repository: https://github.com/rasbt/deeplearning-models # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch") # - Runs on CPU or GPU (if available) # # Deep Convolutional GAN (for Cats and Dogs) # Implementation of a deep convolutional GAN (DCGAN) to generate cats and dogs images based on seeing examples of Kaggle's Dogs and Cats dataset (https://www.kaggle.com/c/dogs-vs-cats/data). # # This DCGAN architecture is based on Radford et al.'s *Unsupervised representation learning with deep convolutional generative adversarial networks* [1], where the generator consists of # # - transposed convolutional layers # - BatchNorm # - ReLU # # # # and the discriminator consists of # # - strided convolutional layers (no maxpooling) # - BatchNorm # - Leaky ReLU # # # ### References # # - [1] Radford, A., Metz, L., & Chintala, S. (2015). [Unsupervised representation learning with deep convolutional generative adversarial networks](https://arxiv.org/abs/1511.06434). arXiv preprint arXiv:1511.06434. # ## Imports # In[2]: import time import os import numpy as np import torch import random import torch.nn.functional as F import torch.nn as nn import torchvision.utils as vutils from PIL import Image from torch.utils.data import Dataset from torch.utils.data import DataLoader from torchvision import transforms if torch.cuda.is_available(): torch.backends.cudnn.deterministic = True # In[3]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # ## Settings # In[4]: ########################## ### SETTINGS ########################## # Device CUDA = 'cuda:2' DEVICE = torch.device(CUDA if torch.cuda.is_available() else "cpu") # Hyperparameters RANDOM_SEED = 42 GENERATOR_LEARNING_RATE = 0.0002 DISCRIMINATOR_LEARNING_RATE = 0.0002 NUM_EPOCHS = 100 BATCH_SIZE = 128 NUM_WORKERS = 4 # workers for data loader IMAGE_SIZE = (64, 64, 3) # Size of the latent vector LATENT_DIM = 100 # Number of feature maps in generator and discriminator NUM_MAPS_GEN = 64 NUM_MAPS_DIS = 64 # Set random seeds for reproducibility random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED); # ## Cats vs Dogs Dataset # Download the Kaggle Cats and Dogs Dataset from https://www.kaggle.com/c/dogs-vs-cats/data by clicking on the "Download All" link: # # ![](../cnn/images/cats-and-dogs-download-all.png) # Then, unzip the dataset. # The dataset folder consists of two subfolders, `train`, and `test1`, which contain the training and test images in jpg format, respectively. Note that the test set examples are unlabeled. # ```python # # import os # # num_train_cats = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train')) # if i.endswith('.jpg') and i.startswith('cat')]) # # num_train_dogs = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train')) # if i.endswith('.jpg') and i.startswith('dog')]) # # print(f'Training set cats: {num_train_cats}') # print(f'Training set dogs: {num_train_dogs}') # # ``` # The naming scheme within each of these subfolders is `..jpg`. # In[5]: img = Image.open(os.path.join('dogs-vs-cats','train', 'cat.59.jpg')) print(np.asarray(img, dtype=np.uint8).shape) plt.imshow(img); # ### Creating Validation and Test Subsets # - Move 2500 images from the training folder into a test set folder # - Move 2500 images from the training folder into a validation set folder # In[6]: if not os.path.exists(os.path.join('dogs-vs-cats', 'test')): os.mkdir(os.path.join('dogs-vs-cats', 'test')) if not os.path.exists(os.path.join('dogs-vs-cats', 'valid')): os.mkdir(os.path.join('dogs-vs-cats', 'valid')) # In[7]: for fname in os.listdir(os.path.join('dogs-vs-cats', 'train')): if not fname.endswith('.jpg'): continue _, img_num, _ = fname.split('.') filepath = os.path.join('dogs-vs-cats', 'train', fname) img_num = int(img_num) if img_num > 11249: os.rename(filepath, filepath.replace('train', 'test')) elif img_num > 9999: os.rename(filepath, filepath.replace('train', 'valid')) # ### Dataloaders # In[8]: class CatsDogsDataset(Dataset): """Custom Dataset for loading CelebA face images""" def __init__(self, img_dir, transform=None): self.img_dir = img_dir self.img_names = [i for i in os.listdir(img_dir) if i.endswith('.jpg')] self.y = [] for i in self.img_names: if i.split('.')[0] == 'cat': self.y.append(0) else: self.y.append(1) self.transform = transform def __getitem__(self, index): img = Image.open(os.path.join(self.img_dir, self.img_names[index])) if self.transform is not None: img = self.transform(img) label = self.y[index] return img, label def __len__(self): return len(self.y) # In[9]: data_transforms = { 'train': transforms.Compose([ #transforms.RandomRotation(5), #transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(IMAGE_SIZE[0], scale=(0.96, 1.0), ratio=(0.95, 1.05)), transforms.ToTensor(), # normalize images to [-1, 1] range transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), 'valid': transforms.Compose([ transforms.Resize([IMAGE_SIZE[0], IMAGE_SIZE[1]]), transforms.ToTensor(), # normalize images to [-1, 1] range transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), } train_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'train'), transform=data_transforms['train']) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, drop_last=True, num_workers=NUM_WORKERS, shuffle=True) # We don't need validation and test sets for GANs, which are unsupervised models """ valid_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'valid'), transform=data_transforms['valid']) valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False) test_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'test'), transform=data_transforms['valid']) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False) """ # In[10]: real_batch = next(iter(train_loader)) plt.figure(figsize=(8, 8)) plt.axis("off") plt.title("Training Images") plt.imshow(np.transpose(vutils.make_grid(real_batch[0][:64], padding=2, normalize=True), (1, 2, 0))) # ## Model # In[11]: # Some model code is loosely inspired by # https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html def weights_init(module): """ Function that initializes weights according to Radford et al.'s DCGAN paper """ classname = module.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(module.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(module.weight.data, 1.0, 0.02) nn.init.constant_(module.bias.data, 0) # In[12]: ########################## ### MODEL ########################## class DCGAN(torch.nn.Module): def __init__(self): super(DCGAN, self).__init__() self.generator = nn.Sequential( # # input size: vector z of size LATENT_DIM # nn.ConvTranspose2d(LATENT_DIM, NUM_MAPS_GEN*8, kernel_size=4, stride=1, padding=0, bias=False), # bias is redundant when using BatchNorm nn.BatchNorm2d(NUM_MAPS_GEN*8), nn.ReLU(True), # # size: NUM_MAPS_GEN*8 x 4 x 4 # nn.ConvTranspose2d(NUM_MAPS_GEN*8, NUM_MAPS_GEN*4, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_GEN*4), nn.ReLU(True), # # size: NUM_MAPS_GEN*4 x 8 x 8 # nn.ConvTranspose2d(NUM_MAPS_GEN*4, NUM_MAPS_GEN*2, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_GEN*2), nn.ReLU(True), # # size: NUM_MAPS_GEN*2 x 16 x 16 # nn.ConvTranspose2d(NUM_MAPS_GEN*2, NUM_MAPS_GEN, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_GEN), nn.ReLU(True), # # size: NUM_MAPS_GEN x 32 x 32 # nn.ConvTranspose2d(NUM_MAPS_GEN, IMAGE_SIZE[2], kernel_size=4, stride=2, padding=1, bias=False), # # size: IMAGE_SIZE[2] x 64 x 64 # nn.Tanh() ) self.discriminator = nn.Sequential( # # input size IMAGE_SIZE[2] x IMAGE_SIZE[0] x IMAGE_SIZE[1] # nn.Conv2d(IMAGE_SIZE[2], NUM_MAPS_DIS, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, inplace=True), # # size: NUM_MAPS_DIS x 32 x 32 # nn.Conv2d(NUM_MAPS_DIS, NUM_MAPS_DIS*2, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_DIS*2), nn.LeakyReLU(0.2, inplace=True), # # size: NUM_MAPS_DIS*2 x 16 x 16 # nn.Conv2d(NUM_MAPS_DIS*2, NUM_MAPS_DIS*4, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_DIS*4), nn.LeakyReLU(0.2, inplace=True), # # size: NUM_MAPS_DIS*4 x 8 x 8 # nn.Conv2d(NUM_MAPS_DIS*4, NUM_MAPS_DIS*8, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(NUM_MAPS_DIS*8), nn.LeakyReLU(0.2, inplace=True), # # size: NUM_MAPS_DIS*8 x 4 x 4 # nn.Conv2d(NUM_MAPS_DIS*8, 1, kernel_size=4, stride=1, padding=0), nn.Sigmoid() ) def generator_forward(self, z): img = self.generator(z) return img def discriminator_forward(self, img): pred = model.discriminator(img) return pred # In[13]: torch.manual_seed(RANDOM_SEED) loss_function = nn.BCELoss() real_label = 1 fake_label = 0 # Batch of latent (noise) vectors for # evaluating / visualizing the training progress # of the generator fixed_noise = torch.randn(64, LATENT_DIM, 1, 1, device=DEVICE) model = DCGAN() model = model.to(DEVICE) model.apply(weights_init) print(model) # In[14]: from torchsummary import summary # torchsummary can only use default cuda device, which # causes issues if e.g., cuda:1 is used with torch.cuda.device(int(CUDA.split(':')[-1])): summary(model.generator, input_size=(100, 1, 1), device='cuda') summary(model.discriminator, input_size=((IMAGE_SIZE[2], IMAGE_SIZE[0], IMAGE_SIZE[1])), device='cuda') # In[15]: optim_gener = torch.optim.Adam(model.generator.parameters(), betas=(0.5, 0.999), lr=GENERATOR_LEARNING_RATE) optim_discr = torch.optim.Adam(model.discriminator.parameters(), betas=(0.5, 0.999), lr=DISCRIMINATOR_LEARNING_RATE) # ## Training # In[16]: start_time = time.time() discr_costs = [] gener_costs = [] images_from_noise = [] for epoch in range(NUM_EPOCHS): model = model.train() for batch_idx, (features, targets) in enumerate(train_loader): # -------------------------- # Train Discriminator # -------------------------- optim_discr.zero_grad() real_images = features.to(DEVICE) num_real = real_images.size(0) real_label_vec = torch.full((num_real,), real_label, device=DEVICE) # get discriminator loss on real images discr_pred_real = model.discriminator_forward(real_images).view(-1) real_loss = loss_function(discr_pred_real, real_label_vec) #real_loss.backward() # get discriminator loss on fake images random_vec = torch.randn(BATCH_SIZE, LATENT_DIM, 1, 1, device=DEVICE) fake_images = model.generator_forward(random_vec) fake_label_vec = torch.full((num_real,), fake_label, device=DEVICE) discr_pred_fake = model.discriminator_forward(fake_images.detach()).view(-1) fake_loss = loss_function(discr_pred_fake, fake_label_vec) #fake_loss.backward() # combined loss discr_loss = 0.5*(real_loss + fake_loss) discr_loss.backward() optim_discr.step() # -------------------------- # Train Generator # -------------------------- optim_gener.zero_grad() discr_pred_fake = model.discriminator_forward(fake_images).view(-1) gener_loss = loss_function(discr_pred_fake, real_label_vec) gener_loss.backward() optim_gener.step() # -------------------------- # Logging # -------------------------- discr_costs.append(discr_loss.item()) gener_costs.append(gener_loss.item()) ### LOGGING if not batch_idx % 100: print ('Epoch: %03d/%03d | Batch %03d/%03d | Gen/Dis Loss: %.4f/%.4f' %(epoch+1, NUM_EPOCHS, batch_idx, len(train_loader), gener_loss, discr_loss)) ### Save images for evaluation with torch.no_grad(): fake_images = model.generator_forward(fixed_noise).detach().cpu() images_from_noise.append( vutils.make_grid(fake_images, padding=2, normalize=True)) print('Time elapsed: %.2f min' % ((time.time() - start_time)/60)) print('Total Training Time: %.2f min' % ((time.time() - start_time)/60)) # ## Evaluation # In[17]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt # In[18]: ax1 = plt.subplot(1, 1, 1) ax1.plot(range(len(gener_costs)), gener_costs, label='Generator loss') ax1.plot(range(len(discr_costs)), discr_costs, label='Discriminator loss') ax1.set_xlabel('Iterations') ax1.set_ylabel('Loss') ax1.legend() ################### # Set scond x-axis ax2 = ax1.twiny() newlabel = list(range(NUM_EPOCHS+1)) iter_per_epoch = len(train_loader) newpos = [e*iter_per_epoch for e in newlabel] ax2.set_xticklabels(newlabel[::10]) ax2.set_xticks(newpos[::10]) ax2.xaxis.set_ticks_position('bottom') ax2.xaxis.set_label_position('bottom') ax2.spines['bottom'].set_position(('outward', 45)) ax2.set_xlabel('Epochs') ax2.set_xlim(ax1.get_xlim()) ################### plt.show() # In[19]: ########################## ### VISUALIZATION ########################## for i in range(0, NUM_EPOCHS, 5): plt.imshow(np.transpose(images_from_noise[i], (1, 2, 0))) plt.show() # In[20]: plt.figure(figsize=(10, 10)) plt.imshow(np.transpose(images_from_noise[-1], (1, 2, 0))) plt.show() # In[ ]: