Having worked on Deep Learning for almost a year, I kept noting useful resources and snippets I used frequently. I've compiled all of them in one place below. Feel free to share some of your hacks/resources/snippets below. Leave a comment below, pull requests are welcome too!
There are times I just cant afford to restart the kernel lol. The snipped below helps me in those cases
# collapse-hide
import gc
def dump_tensors(gpu_only=True):
torch.cuda.empty_cache()
total_size = 0
for obj in gc.get_objects():
# print(obj)
try:
if torch.is_tensor(obj):
if obj.is_cuda:
del obj
gc.collect()
elif hasattr(obj, "data") and torch.is_tensor(obj.data):
if not gpu_only or obj.is_cuda:
del obj
gc.collect()
except Exception as e:
pass
dump_tensors()
Resources below are mostly student focussed
# collapse-hide
num_splits = 5
all_probs = torch.zeros(610, 3, dtype=torch.float32)
# for i in range(2,2+1):
train_dataset, valid_dataset = get_train_valid_dataset(PROJECT_PATH + "data/train/",batch_size=batch_size,augment=True,
random_seed=42,valid_size=0.2,shuffle=True,show_sample=False,
num_workers=4,pin_memory=True,split_no=1)
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)
# https://discuss.pytorch.org/t/how-can-i-use-sklearn-kfold-with-imagefolder/36577
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html#sklearn.model_selection.StratifiedKFold
pin_memory=True
for fold_num, (train_index, test_index) in enumerate(tq(skf.split(train_dataset, train_dataset.targets))):
# valid_subset = torch.utils.data.Subset(valid_dataset,test_index)
train_sampler = ImbalancedDatasetSampler(train_dataset,train_index)
valid_sampler = ImbalancedDatasetSampler(valid_dataset,test_index)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory
)
valid_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=batch_size, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory
)
model_name = 'densenet201'
optim_name = 'AdamW'
lr = 8e-5
PARAMS = {'learning_rate' : lr,
'n_epochs' : 40,
'optimizer' : optim_name,
'model' : model_name,
'fold' : fold_num,
'save_name': model_name+'_TTA_'+optim_name+'_fold_'+str(fold_num)+"_img_"+str(img_size)+'_lr-'+str(lr)
}
neptune.create_experiment(name='pytorch-'+model_name+'-Adam', params=PARAMS)
print("Started train for fold",fold_num)
model = trainCNN(PARAMS['n_epochs'],train_loader,valid_loader,model_name,True,
PARAMS['save_name'],PARAMS['learning_rate'],False)
preds_path,preds_prob = predict(test_loader, model,True)
make_submission(preds_prob,preds_path,PARAMS['save_name'])
all_probs+= preds_prob
# collapse-hide
def cycle(iterable):
iterator = iter(iterable)
while True:
try:
yield next(iterator)
except StopIteration:
iterator = iter(iterable)
Quick function to set all the seeds of Pytorch
# collapse-hide
# seeding function for reproducibility
def seed_everything(seed):
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True