#!/usr/bin/env python # coding: utf-8 # # "Deep Learning cheat sheet for Python" # - toc:true # - categories: ["deep-learning"] # - image: images/copied_from_nb/images/dl-cheat-sheet.jpg # - comments: true # Having worked on Deep Learning for almost a year, I kept noting useful resources and snippets I used frequently. I've compiled all of them in one place below. # **Feel free to share some of your hacks/resources/snippets below. Leave a comment below, pull requests are welcome too!** # # ![](images/dl-cheat-sheet.jpg) # ## Jupyter clear GPU Memory # # There are times I just cant afford to restart the kernel lol. The snipped below helps me in those cases # In[ ]: # collapse-hide import gc def dump_tensors(gpu_only=True): torch.cuda.empty_cache() total_size = 0 for obj in gc.get_objects(): # print(obj) try: if torch.is_tensor(obj): if obj.is_cuda: del obj gc.collect() elif hasattr(obj, "data") and torch.is_tensor(obj.data): if not gpu_only or obj.is_cuda: del obj gc.collect() except Exception as e: pass dump_tensors() # ## Free Quality Courses # # - [Machine Learning](https://www.youtube.com/playlist?list=PLl8OlHZGYOQ7bkVbuRthEsaLr7bONzbXS) # - Taught by a professor in Cornell, love his style of teaching # - [Deep Learning Fastai](https://course.fast.ai/) # - [Deep Learning for Visual Recognition](https://www.youtube.com/playlist?list=PL3FW7Lu3i5JvHM8ljYj-zLfQRF3EO8sYv) # - [Variational Autoencoder](https://www.youtube.com/playlist?list=PLdxQ7SoCLQANizknbIiHzL_hYjEaI-wUe) # ## Free GPU resources # # Resources below are mostly student focussed # - [a good list of free resources](https://course.fast.ai/#ready-to-run-one-click-jupyter) # - [free Azure,AWS and MongoDB creds (Student Developer Pack)](https://education.github.com/pack/offers) # - [google cloud resources](https://google.dev/edu) # ## Computer Vision models # # - [PyTorch Image Models](https://github.com/rwightman/pytorch-image-models) # - [fastai](https://github.com/fastai/fastai) # - [(Generic) EfficientNets for PyTorch](https://github.com/rwightman/gen-efficientnet-pytorch) # - [EfficientNet PyTorch](https://github.com/lukemelas/EfficientNet-PyTorch) # ## GANs # # - [Improved GAN (Semi-supervised GAN)](https://github.com/Sleepychord/ImprovedGAN-pytorch) # - [Pytorch GAN](https://github.com/eriklindernoren/PyTorch-GAN) # ## Training tools # # - [tqdm: A progressbar](https://github.com/tqdm/tqdm) # - [livelossplot](https://github.com/stared/livelossplot) # - best way to analyse plots while training # - [The most lightweight experiment management tool that fits any workflow](https://neptune.ai/) # - [fastpages](https://github.com/fastai/fastpages) # ## Pre/Post processing # # - [Train-val split pytorch](https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb) # - [Augmix](https://github.com/google-research/augmix) # - [Cutmix](https://github.com/clovaai/CutMix-PyTorch) # - [Quick test time augmentation](https://github.com/qubvel/ttach) # - [librosa: Audio extraction](https://github.com/librosa/librosa) # ## K Fold Cross Validation # In[ ]: # collapse-hide num_splits = 5 all_probs = torch.zeros(610, 3, dtype=torch.float32) # for i in range(2,2+1): train_dataset, valid_dataset = get_train_valid_dataset(PROJECT_PATH + "data/train/",batch_size=batch_size,augment=True, random_seed=42,valid_size=0.2,shuffle=True,show_sample=False, num_workers=4,pin_memory=True,split_no=1) skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42) # https://discuss.pytorch.org/t/how-can-i-use-sklearn-kfold-with-imagefolder/36577 # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html#sklearn.model_selection.StratifiedKFold pin_memory=True for fold_num, (train_index, test_index) in enumerate(tq(skf.split(train_dataset, train_dataset.targets))): # valid_subset = torch.utils.data.Subset(valid_dataset,test_index) train_sampler = ImbalancedDatasetSampler(train_dataset,train_index) valid_sampler = ImbalancedDatasetSampler(valid_dataset,test_index) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers, pin_memory=pin_memory ) model_name = 'densenet201' optim_name = 'AdamW' lr = 8e-5 PARAMS = {'learning_rate' : lr, 'n_epochs' : 40, 'optimizer' : optim_name, 'model' : model_name, 'fold' : fold_num, 'save_name': model_name+'_TTA_'+optim_name+'_fold_'+str(fold_num)+"_img_"+str(img_size)+'_lr-'+str(lr) } neptune.create_experiment(name='pytorch-'+model_name+'-Adam', params=PARAMS) print("Started train for fold",fold_num) model = trainCNN(PARAMS['n_epochs'],train_loader,valid_loader,model_name,True, PARAMS['save_name'],PARAMS['learning_rate'],False) preds_path,preds_prob = predict(test_loader, model,True) make_submission(preds_prob,preds_path,PARAMS['save_name']) all_probs+= preds_prob # ## Cyclic loaders # # The standard loader from itertools takes a lot of RAM. The piece of code below reduced my RAM usage by 5 times. # Thanks to rmrao for the [solution](https://github.com/pytorch/pytorch/issues/23900#issuecomment-518858050). # In[ ]: # collapse-hide def cycle(iterable): iterator = iter(iterable) while True: try: yield next(iterator) except StopIteration: iterator = iter(iterable) # ## Reproducibility # # Quick function to set all the seeds of Pytorch # In[ ]: # collapse-hide # seeding function for reproducibility def seed_everything(seed): random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True