import os os.environ['CUDA_VISIBLE_DEVICES']='2' import shutil,timm,os,torch,random,datasets,math,warnings import fastcore.all as fc, numpy as np, matplotlib as mpl, matplotlib.pyplot as plt import k_diffusion as K, torchvision.transforms as T import torchvision.transforms.functional as TF,torch.nn.functional as F from torch.utils.data import DataLoader,default_collate from pathlib import Path from torch.nn import init from fastcore.foundation import L from torch import nn,tensor from operator import itemgetter from torcheval.metrics import MulticlassAccuracy from functools import partial from torch.optim import lr_scheduler from torch import optim from torchvision.io import read_image,ImageReadMode from glob import glob from miniai.datasets import * from miniai.conv import * from miniai.learner import * from miniai.activations import * from miniai.init import * from miniai.sgd import * from miniai.resnet import * from miniai.augment import * from miniai.accel import * from miniai.training import * from fastprogress import progress_bar torch.set_printoptions(precision=5, linewidth=140, sci_mode=False) torch.manual_seed(1) mpl.rcParams['figure.dpi'] = 70 set_seed(42) if fc.defaults.cpus>8: fc.defaults.cpus=8 path_data = Path('data') path_data.mkdir(exist_ok=True) path = path_data/'tiny-imagenet-200' url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip' if not path.exists(): path_zip = fc.urlsave(url, path_data) shutil.unpack_archive('data/tiny-imagenet-200.zip', 'data') bs = 512 class TinyDS: def __init__(self, path): self.path = Path(path) self.files = glob(str(path/'**/*.JPEG'), recursive=True) def __len__(self): return len(self.files) def __getitem__(self, i): return self.files[i],Path(self.files[i]).parent.parent.name tds = TinyDS(path/'train') path_anno = path/'val'/'val_annotations.txt' anno = dict(o.split('\t')[:2] for o in path_anno.read_text().splitlines()) class TinyValDS(TinyDS): def __getitem__(self, i): return self.files[i],anno[os.path.basename(self.files[i])] vds = TinyValDS(path/'val') class TfmDS: def __init__(self, ds, tfmx=fc.noop, tfmy=fc.noop): self.ds,self.tfmx,self.tfmy = ds,tfmx,tfmy def __len__(self): return len(self.ds) def __getitem__(self, i): x,y = self.ds[i] return self.tfmx(x),self.tfmy(y) id2str = (path/'wnids.txt').read_text().splitlines() str2id = {v:k for k,v in enumerate(id2str)} xmean,xstd = (tensor([0.47565, 0.40303, 0.31555]), tensor([0.28858, 0.24402, 0.26615])) def tfmx(x): img = read_image(x, mode=ImageReadMode.RGB)/255 return (img-xmean[:,None,None])/xstd[:,None,None] def tfmy(y): return tensor(str2id[y]) tfm_tds = TfmDS(tds, tfmx, tfmy) tfm_vds = TfmDS(vds, tfmx, tfmy) def denorm(x): return (x*xstd[:,None,None]+xmean[:,None,None]).clip(0,1) all_synsets = [o.split('\t') for o in (path/'words.txt').read_text().splitlines()] synsets = {k:v.split(',', maxsplit=1)[0] for k,v in all_synsets if k in id2str} dls = DataLoaders(*get_dls(tfm_tds, tfm_vds, bs=bs, num_workers=8)) def tfm_batch(b, tfm_x=fc.noop, tfm_y = fc.noop): return tfm_x(b[0]),tfm_y(b[1]) tfms = nn.Sequential(T.Pad(4), T.RandomCrop(64), T.RandomHorizontalFlip(), RandErase()) augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False) act_gr = partial(GeneralRelu, leak=0.1, sub=0.4) iw = partial(init_weights, leaky=0.1) nfs = (32,64,128,256,512,1024) def get_dropmodel(act=act_gr, nfs=nfs, norm=nn.BatchNorm2d, drop=0.1): layers = [nn.Conv2d(3, nfs[0], 5, padding=2)] # layers += [ResBlock(nfs[0], nfs[0], ks=3, stride=1, act=act, norm=norm)] layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)] layers += [nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Dropout(drop)] layers += [nn.Linear(nfs[-1], 200, bias=False), nn.BatchNorm1d(200)] return nn.Sequential(*layers).apply(iw) def res_blocks(n_bk, ni, nf, stride=1, ks=3, act=act_gr, norm=None): return nn.Sequential(*[ ResBlock(ni if i==0 else nf, nf, stride=stride if i==n_bk-1 else 1, ks=ks, act=act, norm=norm) for i in range(n_bk)]) nbks = (3,2,2,1,1) def get_dropmodel(act=act_gr, nfs=nfs, nbks=nbks, norm=nn.BatchNorm2d, drop=0.2): layers = [ResBlock(3, nfs[0], ks=5, stride=1, act=act, norm=norm)] layers += [res_blocks(nbks[i], nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)] layers += [nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Dropout(drop)] layers += [nn.Linear(nfs[-1], 200, bias=False), nn.BatchNorm1d(200)] return nn.Sequential(*layers).apply(iw) opt_func = partial(optim.AdamW, eps=1e-5) metrics = MetricsCB(accuracy=MulticlassAccuracy()) cbs = [DeviceCB(), metrics, ProgressCB(plot=True), MixedPrecision()] epochs = 25 lr = 3e-2 tmax = epochs * len(dls.train) sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax) xtra = [BatchSchedCB(sched), augcb] learn = Learner(get_dropmodel(), dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=opt_func) aug_tfms = nn.Sequential(T.Pad(4), T.RandomCrop(64), T.RandomHorizontalFlip(), T.TrivialAugmentWide()) norm_tfm = T.Normalize(xmean, xstd) erase_tfm = RandErase() from PIL import Image def tfmx(x, aug=False): x = Image.open(x).convert('RGB') if aug: x = aug_tfms(x) x = TF.to_tensor(x) x = norm_tfm(x) if aug: x = erase_tfm(x[None])[0] return x tfm_tds = TfmDS(tds, partial(tfmx, aug=True), tfmy) tfm_vds = TfmDS(vds, tfmx, tfmy) dls = DataLoaders(*get_dls(tfm_tds, tfm_vds, bs=bs, num_workers=8)) def conv(ni, nf, ks=3, stride=1, act=nn.ReLU, norm=None, bias=True): layers = [] if norm: layers.append(norm(ni)) if act : layers.append(act()) layers.append(nn.Conv2d(ni, nf, stride=stride, kernel_size=ks, padding=ks//2, bias=bias)) return nn.Sequential(*layers) def _conv_block(ni, nf, stride, act=act_gr, norm=None, ks=3): return nn.Sequential(conv(ni, nf, stride=1 , act=act, norm=norm, ks=ks), conv(nf, nf, stride=stride, act=act, norm=norm, ks=ks)) class ResBlock(nn.Module): def __init__(self, ni, nf, stride=1, ks=3, act=act_gr, norm=None): super().__init__() self.convs = _conv_block(ni, nf, stride, act=act, ks=ks, norm=norm) self.idconv = fc.noop if ni==nf else conv(ni, nf, ks=1, stride=1, act=None, norm=norm) self.pool = fc.noop if stride==1 else nn.AvgPool2d(2, ceil_mode=True) def forward(self, x): return self.convs(x) + self.idconv(self.pool(x)) def get_dropmodel(act=act_gr, nfs=nfs, nbks=nbks, norm=nn.BatchNorm2d, drop=0.2): layers = [nn.Conv2d(3, nfs[0], 5, padding=2)] layers += [res_blocks(nbks[i], nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)] layers += [act_gr(), norm(nfs[-1]), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Dropout(drop)] layers += [nn.Linear(nfs[-1], 200, bias=False), nn.BatchNorm1d(200)] return nn.Sequential(*layers).apply(iw) epochs = 50 lr = 0.1 tmax = epochs * len(dls.train) sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax) xtra = [BatchSchedCB(sched)] model = get_dropmodel(nbks=(1,2,4,2,2), nfs=(32, 64, 128, 512, 768, 1024), drop=0.1) learn = Learner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=opt_func) learn.fit(epochs) torch.save(learn.model, 'models/inettiny-widish-50')