#| default_exp datasets
#|export
from __future__ import annotations
import math,numpy as np,matplotlib.pyplot as plt
from operator import itemgetter
from itertools import zip_longest
import fastcore.all as fc
from torch.utils.data import default_collate
from miniai.training import *
import logging,pickle,gzip,os,time,shutil,torch,matplotlib as mpl
from pathlib import Path
from torch import tensor,nn,optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from datasets import load_dataset,load_dataset_builder
import torchvision.transforms.functional as TF
from fastcore.test import test_close
torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)
torch.manual_seed(1)
mpl.rcParams['image.cmap'] = 'gray'
logging.disable(logging.WARNING)
name = "fashion_mnist"
ds_builder = load_dataset_builder(name)
print(ds_builder.info.description)
Fashion-MNIST is a dataset of Zalando's article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. We intend Fashion-MNIST to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking machine learning algorithms. It shares the same image size and structure of training and testing splits.
ds_builder.info.features
{'image': Image(decode=True, id=None), 'label': ClassLabel(num_classes=10, names=['T - shirt / top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'], id=None)}
ds_builder.info.splits
{'train': SplitInfo(name='train', num_bytes=31296607, num_examples=60000, dataset_name='fashion_mnist'), 'test': SplitInfo(name='test', num_bytes=5233810, num_examples=10000, dataset_name='fashion_mnist')}
dsd = load_dataset(name)
dsd
0%| | 0/2 [00:00<?, ?it/s]
DatasetDict({ train: Dataset({ features: ['image', 'label'], num_rows: 60000 }) test: Dataset({ features: ['image', 'label'], num_rows: 10000 }) })
train,test = dsd['train'],dsd['test']
train[0]
{'image': <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28>, 'label': 9}
x,y = ds_builder.info.features
x,y
('image', 'label')
x,y = 'image','label'
img = train[0][x]
img
xb = train[:5][x]
yb = train[:5][y]
yb
[9, 0, 0, 3, 0]
featy = train.features[y]
featy
ClassLabel(num_classes=10, names=['T - shirt / top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'], id=None)
featy.int2str(yb)
['Ankle boot', 'T - shirt / top', 'T - shirt / top', 'Dress', 'T - shirt / top']
train['label'][:5]
[9, 0, 0, 3, 0]
def collate_fn(b):
return {x:torch.stack([TF.to_tensor(o[x]) for o in b]),
y:tensor([o[y] for o in b])}
dl = DataLoader(train, collate_fn=collate_fn, batch_size=16)
b = next(iter(dl))
b[x].shape,b[y]
(torch.Size([16, 1, 28, 28]), tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9]))
def transforms(b):
b[x] = [TF.to_tensor(o) for o in b[x]]
return b
tds = train.with_transform(transforms)
dl = DataLoader(tds, batch_size=16)
b = next(iter(dl))
b[x].shape,b[y]
(torch.Size([16, 1, 28, 28]), tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9]))
def _transformi(b): b[x] = [torch.flatten(TF.to_tensor(o)) for o in b[x]]
#|export
def inplace(f):
def _f(b):
f(b)
return b
return _f
transformi = inplace(_transformi)
r = train.with_transform(transformi)[0]
r[x].shape,r[y]
(torch.Size([784]), 9)
@inplace
def transformi(b): b[x] = [torch.flatten(TF.to_tensor(o)) for o in b[x]]
tdsf = train.with_transform(transformi)
r = tdsf[0]
r[x].shape,r[y]
(torch.Size([784]), 9)
d = dict(a=1,b=2,c=3)
ig = itemgetter('a','c')
ig(d)
(1, 3)
class D:
def __getitem__(self, k): return 1 if k=='a' else 2 if k=='b' else 3
d = D()
ig(d)
(1, 3)
list(tdsf.features)
['image', 'label']
batch = dict(a=[1],b=[2]), dict(a=[3],b=[4])
default_collate(batch)
{'a': [tensor([1, 3])], 'b': [tensor([2, 4])]}
#|export
def collate_dict(ds):
get = itemgetter(*ds.features)
def _f(b): return get(default_collate(b))
return _f
dlf = DataLoader(tdsf, batch_size=4, collate_fn=collate_dict(tdsf))
xb,yb = next(iter(dlf))
xb.shape,yb
(torch.Size([4]), [9, 0, 0, 3, 0])
b = next(iter(dl))
xb = b['image']
img = xb[0]
plt.imshow(img[0]);
#|export
@fc.delegates(plt.Axes.imshow)
def show_image(im, ax=None, figsize=None, title=None, noframe=True, **kwargs):
"Show a PIL or PyTorch image on `ax`."
if fc.hasattrs(im, ('cpu','permute','detach')):
im = im.detach().cpu()
if len(im.shape)==3 and im.shape[0]<5: im=im.permute(1,2,0)
elif not isinstance(im,np.ndarray): im=np.array(im)
if im.shape[-1]==1: im=im[...,0]
if ax is None: _,ax = plt.subplots(figsize=figsize)
ax.imshow(im, **kwargs)
if title is not None: ax.set_title(title)
ax.set_xticks([])
ax.set_yticks([])
if noframe: ax.axis('off')
return ax
help(show_image)
Help on function show_image in module __main__: show_image(im, ax=None, figsize=None, title=None, noframe=True, *, cmap=None, norm=None, aspect=None, interpolation=None, alpha=None, vmin=None, vmax=None, origin=None, extent=None, interpolation_stage=None, filternorm=True, filterrad=4.0, resample=None, url=None, data=None) Show a PIL or PyTorch image on `ax`.
show_image(img, figsize=(2,2));
fig,axs = plt.subplots(1,2)
show_image(img, axs[0])
show_image(xb[1], axs[1]);
#|export
@fc.delegates(plt.subplots, keep=True)
def subplots(
nrows:int=1, # Number of rows in returned axes grid
ncols:int=1, # Number of columns in returned axes grid
figsize:tuple=None, # Width, height in inches of the returned figure
imsize:int=3, # Size (in inches) of images that will be displayed in the returned figure
suptitle:str=None, # Title to be set to returned figure
**kwargs
): # fig and axs
"A figure and set of subplots to display images of `imsize` inches"
if figsize is None: figsize=(ncols*imsize, nrows*imsize)
fig,ax = plt.subplots(nrows, ncols, figsize=figsize, **kwargs)
if suptitle is not None: fig.suptitle(suptitle)
if nrows*ncols==1: ax = np.array([ax])
return fig,ax
from nbdev.showdoc import show_doc
show_doc(subplots)
source{target="_blank" style="float:right; font-size:smaller"}
subplots (nrows:int=1, ncols:int=1, figsize:tuple=None, imsize:int=3, suptitle:str=None, sharex=False, sharey=False, squeeze=True, subplot_kw=None, gridspec_kw=None, **kwargs)
A figure and set of subplots to display images of imsize
inches
Type | Default | Details | |
---|---|---|---|
nrows | int | 1 | Number of rows in returned axes grid |
ncols | int | 1 | Number of columns in returned axes grid |
figsize | tuple | None | Width, height in inches of the returned figure |
imsize | int | 3 | Size (in inches) of images that will be displayed in the returned figure |
suptitle | str | None | Title to be set to returned figure |
sharex | bool | False | |
sharey | bool | False | |
squeeze | bool | True | |
subplot_kw | NoneType | None | |
gridspec_kw | NoneType | None | |
kwargs |
fig,axs = subplots(3,3, imsize=1)
imgs = xb[:8]
for ax,img in zip(axs.flat,imgs): show_image(img, ax)
#|export
@fc.delegates(subplots)
def get_grid(
n:int, # Number of axes
nrows:int=None, # Number of rows, defaulting to `int(math.sqrt(n))`
ncols:int=None, # Number of columns, defaulting to `ceil(n/rows)`
title:str=None, # If passed, title set to the figure
weight:str='bold', # Title font weight
size:int=14, # Title font size
**kwargs,
): # fig and axs
"Return a grid of `n` axes, `rows` by `cols`"
if nrows: ncols = ncols or int(np.floor(n/nrows))
elif ncols: nrows = nrows or int(np.ceil(n/ncols))
else:
nrows = int(math.sqrt(n))
ncols = int(np.floor(n/nrows))
fig,axs = subplots(nrows, ncols, **kwargs)
for i in range(n, nrows*ncols): axs.flat[i].set_axis_off()
if title is not None: fig.suptitle(title, weight=weight, size=size)
return fig,axs
fig,axs = get_grid(8, nrows=3, imsize=1)
for ax,img in zip(axs.flat,imgs): show_image(img, ax)
#|export
@fc.delegates(subplots)
def show_images(ims:list, # Images to show
nrows:int|None=None, # Number of rows in grid
ncols:int|None=None, # Number of columns in grid (auto-calculated if None)
titles:list|None=None, # Optional list of titles for each image
**kwargs):
"Show all images `ims` as subplots with `rows` using `titles`"
axs = get_grid(len(ims), nrows, ncols, **kwargs)[1].flat
for im,t,ax in zip_longest(ims, titles or [], axs): show_image(im, ax=ax, title=t)
yb = b['label']
lbls = yb[:8]
names = "Top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Boot".split()
titles = itemgetter(*lbls)(names)
' '.join(titles)
'Boot Top Top Dress Top Pullover Sneaker Pullover'
show_images(imgs, imsize=1.7, titles=titles)
#|export
class DataLoaders:
def __init__(self, *dls): self.train,self.valid = dls[:2]
@classmethod
def from_dd(cls, dd, batch_size, as_tuple=True, **kwargs):
f = collate_dict(dd['train'])
return cls(*get_dls(*dd.values(), bs=batch_size, collate_fn=f, **kwargs))
import nbdev; nbdev.nbdev_export()