#!/usr/bin/env python
# coding: utf-8
# Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
# - Author: Sebastian Raschka
# - GitHub Repository: https://github.com/rasbt/deeplearning-models
# # Custom DataLoader Example for PNG files
# Illustration of how we can efficiently iterate through custom (image) datasets. For this, suppose
# - mnist_train, mnist_valid, and mnist_test are image folders you created with your own custom images
# - mnist_train.csv, mnist_valid.csv, and mnist_test.csv are tables that store the image names with their associated class labels
# In[1]:
get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -v -p torch,pandas,numpy,matplotlib")
# # 1) Inspecting the Dataset
# In[2]:
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
from PIL import Image
# In[3]:
im = Image.open('mnist_train/1.png')
plt.imshow(im)
# In[4]:
import numpy as np
im_array = np.array(im)
print('Array Dimensions', im_array.shape)
print()
print(im_array)
#
#
#
#
#
#
#
#
#
# In[5]:
import pandas as pd
# In[6]:
df_train = pd.read_csv('mnist_train.csv')
print(df_train.shape)
df_train.head()
# In[7]:
df_valid = pd.read_csv('mnist_valid.csv')
print(df_valid.shape)
df_valid.head()
# In[8]:
df_test = pd.read_csv('mnist_test.csv')
print(df_test.shape)
df_test.head()
#
#
#
#
#
#
# # 2) Custom Dataset Class
# In[9]:
import torch
from PIL import Image
from torch.utils.data import Dataset
import os
class MyDataset(Dataset):
def __init__(self, csv_path, img_dir, transform=None):
df = pd.read_csv(csv_path)
self.img_dir = img_dir
self.img_names = df['File Name']
self.y = df['Class Label']
self.transform = transform
def __getitem__(self, index):
img = Image.open(os.path.join(self.img_dir,
self.img_names[index]))
if self.transform is not None:
img = self.transform(img)
label = self.y[index]
return img, label
def __len__(self):
return self.y.shape[0]
#
#
#
#
#
#
# # 3) Custom Dataloader
# In[10]:
from torchvision import transforms
from torch.utils.data import DataLoader
# Note that transforms.ToTensor()
# already divides pixels by 255. internally
custom_transform = transforms.Compose([#transforms.Lambda(lambda x: x/255.), # not necessary
transforms.ToTensor()
])
train_dataset = MyDataset(csv_path='mnist_train.csv',
img_dir='mnist_train',
transform=custom_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=32,
shuffle=True, # want to shuffle the dataset
num_workers=4) # number processes/CPUs to use
#
#
#
# ## 4) Iterating Through the Dataset
# In[11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)
num_epochs = 2
for epoch in range(num_epochs):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
# In[12]:
print(x.shape)
# In[13]:
x_image_as_vector = x.view(-1, 28*28)
print(x_image_as_vector.shape)
# In[14]:
x