#!/usr/bin/env python
# coding: utf-8

# Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
# - Author: Sebastian Raschka
# - GitHub Repository: https://github.com/rasbt/deeplearning-models

# In[1]:


get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch")


# - Runs on CPU or GPU (if available)

# # Model Zoo -- Reproducible Results with Deterministic Behavior and Runtime Benchmark

# In this notebook, we are benchmarking the performance impact of setting PyTorch to deterministic behavior. In general, there are two aspects for reproducible resuls in PyTorch, 
# 1. Setting a random seed
# 2. Setting cuDNN and PyTorch algorithmic behavior to deterministic
# 
# For more details, please see https://pytorch.org/docs/stable/notes/randomness.html

# ### 1. Setting a random seed

# I recommend using a function like the following one prior to using dataset loaders and initializing a model if you want to ensure the data is shuffled in the same manner if you rerun this notebook and the model gets the same initial random weights:

# In[2]:


def set_all_seeds(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


# ### 2. Setting cuDNN and PyTorch algorithmic behavior to deterministic

# Similar to the `set_all_seeds` function above, I recommend setting the behavior of PyTorch and cuDNN to deterministic (this is particulary relevant when using GPUs). We can also define a function for that:

# In[3]:


def set_deterministic():
    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    torch.set_deterministic(True)


# # 1) Setup

# After setting up the general configuration in this section, the following two sections will train a ResNet-101 model without and with deterministic behavior to get a sense how using deterministic options affect the runtime speed.

# In[4]:


import os
import numpy as np
import torch
import random


# In[5]:


##########################
### SETTINGS
##########################

# Device
CUDA_DEVICE_NUM = 1 # change as appropriate
DEVICE = torch.device('cuda:%d' % CUDA_DEVICE_NUM if torch.cuda.is_available() else 'cpu')
print('Device:', DEVICE)

# Data settings
num_classes = 10

# Hyperparameters
random_seed = 1
learning_rate = 0.01
batch_size = 128
num_epochs = 50


# In[6]:


import sys

sys.path.insert(0, "..") # to include ../helper_evaluate.py etc.

from helper_evaluate import compute_accuracy
from helper_data import get_dataloaders_cifar10
from helper_train import train_classifier_simple_v1


# # 2) Run without Deterministic Behavior

# Before we enable deterministic behavior, we will run a ResNet-101 with otherwise the exact same settings for comparison. Note that setting random seeds doesn't affect the timing results.

# In[7]:


### Set random seed ###
set_all_seeds(random_seed)


# In[8]:


##########################
### Dataset
##########################

train_loader, valid_loader, test_loader = get_dataloaders_cifar10(
    batch_size, 
    num_workers=0, 
    validation_fraction=0.1)


# In[9]:


##########################
### Model
##########################


from deterministic_benchmark_utils import resnet101


model = resnet101(num_classes, grayscale=False)

model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# In[10]:


_ = train_classifier_simple_v1(num_epochs=num_epochs, model=model, 
                               optimizer=optimizer, device=DEVICE, 
                               train_loader=train_loader, valid_loader=valid_loader, 
                               logging_interval=200)


# # 3) Run with Deterministic Behavior

# In this section, we set the deterministic behavior via the `set_deterministic()` function defined at the top of this notebook and compare how it affects the runtime speed of the ResNet-101 model. (Note that setting random seeds doesn't affect the timing results.)

# In[11]:


set_deterministic()


# In[12]:


### Set random seed ###
set_all_seeds(random_seed)


# In[13]:


##########################
### Dataset
##########################

train_loader, valid_loader, test_loader = get_dataloaders_cifar10(
    batch_size, 
    num_workers=0, 
    validation_fraction=0.1)


# In[14]:


##########################
### Model
##########################


from deterministic_benchmark_utils import resnet101


model = resnet101(num_classes, grayscale=False)

model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# In[15]:


_ = train_classifier_simple_v1(num_epochs=num_epochs, model=model, 
                               optimizer=optimizer, device=DEVICE, 
                               train_loader=train_loader, valid_loader=valid_loader, 
                               logging_interval=200)


# # 4) Result

# In this particular case, the deterministic behavior does not seem to influence performance noticeably.