#!/usr/bin/env python # coding: utf-8 # Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks. # - Author: Sebastian Raschka # - GitHub Repository: https://github.com/rasbt/deeplearning-models # # Custom DataLoader Example for PNG files # Illustration of how we can efficiently iterate through custom (image) datasets. For this, suppose # - mnist_train, mnist_valid, and mnist_test are image folders you created with your own custom images # - mnist_train.csv, mnist_valid.csv, and mnist_test.csv are tables that store the image names with their associated class labels # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch,pandas,numpy,matplotlib") # # 1) Inspecting the Dataset # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt from PIL import Image # In[3]: im = Image.open('mnist_train/1.png') plt.imshow(im) # In[4]: import numpy as np im_array = np.array(im) print('Array Dimensions', im_array.shape) print() print(im_array) #
#
#
#
#
#
#
#
#
# In[5]: import pandas as pd # In[6]: df_train = pd.read_csv('mnist_train.csv') print(df_train.shape) df_train.head() # In[7]: df_valid = pd.read_csv('mnist_valid.csv') print(df_valid.shape) df_valid.head() # In[8]: df_test = pd.read_csv('mnist_test.csv') print(df_test.shape) df_test.head() #
#
#
#
#
#
# # 2) Custom Dataset Class # In[9]: import torch from PIL import Image from torch.utils.data import Dataset import os class MyDataset(Dataset): def __init__(self, csv_path, img_dir, transform=None): df = pd.read_csv(csv_path) self.img_dir = img_dir self.img_names = df['File Name'] self.y = df['Class Label'] self.transform = transform def __getitem__(self, index): img = Image.open(os.path.join(self.img_dir, self.img_names[index])) if self.transform is not None: img = self.transform(img) label = self.y[index] return img, label def __len__(self): return self.y.shape[0] #
#
#
#
#
#
# # 3) Custom Dataloader # In[10]: from torchvision import transforms from torch.utils.data import DataLoader # Note that transforms.ToTensor() # already divides pixels by 255. internally custom_transform = transforms.Compose([#transforms.Lambda(lambda x: x/255.), # not necessary transforms.ToTensor() ]) train_dataset = MyDataset(csv_path='mnist_train.csv', img_dir='mnist_train', transform=custom_transform) train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, # want to shuffle the dataset num_workers=4) # number processes/CPUs to use #
#
#
# ## 4) Iterating Through the Dataset # In[11]: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.manual_seed(0) num_epochs = 2 for epoch in range(num_epochs): for batch_idx, (x, y) in enumerate(train_loader): print('Epoch:', epoch+1, end='') print(' | Batch index:', batch_idx, end='') print(' | Batch size:', y.size()[0]) x = x.to(device) y = y.to(device) # In[12]: print(x.shape) # In[13]: x_image_as_vector = x.view(-1, 28*28) print(x_image_as_vector.shape) # In[14]: x