#!/usr/bin/env python
# coding: utf-8

# # "Deep Learning cheat sheet for Python"
# - toc:true
# - categories: ["deep-learning"]
# - image: images/copied_from_nb/images/dl-cheat-sheet.jpg
# - comments: true

# Having worked on Deep Learning for almost a year, I kept noting useful resources and snippets I used frequently. I've compiled all of them in one place below.
# **Feel free to share some of your hacks/resources/snippets below. Leave a comment below, pull requests are welcome too!**  
# 
# ![](images/dl-cheat-sheet.jpg)

# ## Jupyter clear GPU Memory
# 
# There are times I just cant afford to restart the kernel lol. The snipped below helps me in those cases

# In[ ]:


# collapse-hide
import gc
def dump_tensors(gpu_only=True):
        torch.cuda.empty_cache()
        total_size = 0
        for obj in gc.get_objects():
            # print(obj)
            try:
                if torch.is_tensor(obj):
                    if obj.is_cuda:
                        del obj
                        gc.collect()
                elif hasattr(obj, "data") and torch.is_tensor(obj.data):
                    if not gpu_only or obj.is_cuda:
                        del obj
                        gc.collect()
            except Exception as e:
                pass
dump_tensors()


# ## Free Quality Courses
# 
# - [Machine Learning](https://www.youtube.com/playlist?list=PLl8OlHZGYOQ7bkVbuRthEsaLr7bONzbXS)
#     - Taught by a professor in Cornell, love his style of teaching
# - [Deep Learning Fastai](https://course.fast.ai/)
# - [Deep Learning for Visual Recognition](https://www.youtube.com/playlist?list=PL3FW7Lu3i5JvHM8ljYj-zLfQRF3EO8sYv)
# - [Variational Autoencoder](https://www.youtube.com/playlist?list=PLdxQ7SoCLQANizknbIiHzL_hYjEaI-wUe)

# ## Free GPU resources
# 
# Resources below are mostly student focussed

# - [a good list of free resources](https://course.fast.ai/#ready-to-run-one-click-jupyter)
# - [free Azure,AWS and MongoDB creds (Student Developer Pack)](https://education.github.com/pack/offers)
# - [google cloud resources](https://google.dev/edu)

# ## Computer Vision models
# 
# - [PyTorch Image Models](https://github.com/rwightman/pytorch-image-models)
# - [fastai](https://github.com/fastai/fastai)
# - [(Generic) EfficientNets for PyTorch](https://github.com/rwightman/gen-efficientnet-pytorch)
# - [EfficientNet PyTorch](https://github.com/lukemelas/EfficientNet-PyTorch)

# ## GANs
# 
# - [Improved GAN (Semi-supervised GAN)](https://github.com/Sleepychord/ImprovedGAN-pytorch)
# - [Pytorch GAN](https://github.com/eriklindernoren/PyTorch-GAN)

# ## Training tools
# 
# - [tqdm: A progressbar](https://github.com/tqdm/tqdm)
# - [livelossplot](https://github.com/stared/livelossplot)
#     - best way to analyse plots while training
# - [The most lightweight experiment management tool that fits any workflow](https://neptune.ai/)
# - [fastpages](https://github.com/fastai/fastpages)

# ## Pre/Post processing
# 
# - [Train-val split pytorch](https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb)
# - [Augmix](https://github.com/google-research/augmix)
# - [Cutmix](https://github.com/clovaai/CutMix-PyTorch)
# - [Quick test time augmentation](https://github.com/qubvel/ttach)
# - [librosa: Audio extraction](https://github.com/librosa/librosa)

# ## K Fold Cross Validation

# In[ ]:


# collapse-hide
num_splits = 5
all_probs = torch.zeros(610, 3, dtype=torch.float32)
# for i in range(2,2+1):
train_dataset, valid_dataset = get_train_valid_dataset(PROJECT_PATH + "data/train/",batch_size=batch_size,augment=True,
                                                    random_seed=42,valid_size=0.2,shuffle=True,show_sample=False,
                                                    num_workers=4,pin_memory=True,split_no=1)
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)
# https://discuss.pytorch.org/t/how-can-i-use-sklearn-kfold-with-imagefolder/36577
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html#sklearn.model_selection.StratifiedKFold

pin_memory=True
for fold_num, (train_index, test_index) in enumerate(tq(skf.split(train_dataset, train_dataset.targets))):
  # valid_subset = torch.utils.data.Subset(valid_dataset,test_index)
  train_sampler = ImbalancedDatasetSampler(train_dataset,train_index)
  valid_sampler = ImbalancedDatasetSampler(valid_dataset,test_index)

  train_loader = torch.utils.data.DataLoader(
      train_dataset, batch_size=batch_size, sampler=train_sampler,
      num_workers=num_workers, pin_memory=pin_memory
  )
  valid_loader = torch.utils.data.DataLoader(
      valid_dataset, batch_size=batch_size, sampler=valid_sampler,
      num_workers=num_workers, pin_memory=pin_memory
  )
  
  model_name = 'densenet201'
  optim_name = 'AdamW'
  lr = 8e-5
  PARAMS = {'learning_rate' : lr,
            'n_epochs' : 40,
            'optimizer' : optim_name,
            'model' : model_name,
            'fold' : fold_num,
            'save_name': model_name+'_TTA_'+optim_name+'_fold_'+str(fold_num)+"_img_"+str(img_size)+'_lr-'+str(lr)
            }
  neptune.create_experiment(name='pytorch-'+model_name+'-Adam', params=PARAMS)
  print("Started train for fold",fold_num)
  model = trainCNN(PARAMS['n_epochs'],train_loader,valid_loader,model_name,True,
                  PARAMS['save_name'],PARAMS['learning_rate'],False)
  preds_path,preds_prob = predict(test_loader, model,True)
  
  make_submission(preds_prob,preds_path,PARAMS['save_name'])

  all_probs+= preds_prob


# ## Cyclic loaders
# 
# The standard loader from itertools takes a lot of RAM. The piece of code below reduced my RAM usage by 5 times.  
# Thanks to rmrao for the [solution](https://github.com/pytorch/pytorch/issues/23900#issuecomment-518858050).

# In[ ]:


# collapse-hide
def cycle(iterable):
    iterator = iter(iterable)
    while True:
        try:
            yield next(iterator)
        except StopIteration:
            iterator = iter(iterable)


# ## Reproducibility
# 
# Quick function to set all the seeds of Pytorch

# In[ ]:


# collapse-hide
# seeding function for reproducibility
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True