%load_ext autoreload %autoreload 2 %matplotlib inline #export from exp.nb_05b import * torch.set_num_threads(2) x_train,y_train,x_valid,y_valid = get_data() #export def normalize_to(train, valid): m,s = train.mean(),train.std() return normalize(train, m, s), normalize(valid, m, s) x_train,x_valid = normalize_to(x_train,x_valid) train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid) x_train.mean(),x_train.std() nh,bs = 50,512 c = y_train.max().item()+1 loss_func = F.cross_entropy data = DataBunch(*get_dls(train_ds, valid_ds, bs), c) #export class Lambda(nn.Module): def __init__(self, func): super().__init__() self.func = func def forward(self, x): return self.func(x) def flatten(x): return x.view(x.shape[0], -1) def mnist_resize(x): return x.view(-1, 1, 28, 28) def get_cnn_model(data): return nn.Sequential( Lambda(mnist_resize), nn.Conv2d( 1, 8, 5, padding=2,stride=2), nn.ReLU(), #14 nn.Conv2d( 8,16, 3, padding=1,stride=2), nn.ReLU(), # 7 nn.Conv2d(16,32, 3, padding=1,stride=2), nn.ReLU(), # 4 nn.Conv2d(32,32, 3, padding=1,stride=2), nn.ReLU(), # 2 nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(32,data.c) ) model = get_cnn_model(data) cbfs = [Recorder, partial(AvgStatsCallback,accuracy)] opt = optim.SGD(model.parameters(), lr=0.4) learn = Learner(model, opt, loss_func, data) run = Runner(cb_funcs=cbfs) %time run.fit(1, learn) # Somewhat more flexible way device = torch.device('cuda',0) class CudaCallback(Callback): def __init__(self,device): self.device=device def begin_fit(self): self.model.to(self.device) def begin_batch(self): self.run.xb,self.run.yb = self.xb.to(self.device),self.yb.to(self.device) # Somewhat less flexible, but quite convenient torch.cuda.set_device(device) #export class CudaCallback(Callback): def begin_fit(self): self.model.cuda() def begin_batch(self): self.run.xb,self.run.yb = self.xb.cuda(),self.yb.cuda() cbfs.append(CudaCallback) model = get_cnn_model(data) opt = optim.SGD(model.parameters(), lr=0.4) learn = Learner(model, opt, loss_func, data) run = Runner(cb_funcs=cbfs) %time run.fit(3, learn) def conv2d(ni, nf, ks=3, stride=2): return nn.Sequential( nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride), nn.ReLU()) #export class BatchTransformXCallback(Callback): _order=2 def __init__(self, tfm): self.tfm = tfm def begin_batch(self): self.run.xb = self.tfm(self.xb) def view_tfm(*size): def _inner(x): return x.view(*((-1,)+size)) return _inner mnist_view = view_tfm(1,28,28) cbfs.append(partial(BatchTransformXCallback, mnist_view)) nfs = [8,16,32,32] def get_cnn_layers(data, nfs): nfs = [1] + nfs return [ conv2d(nfs[i], nfs[i+1], 5 if i==0 else 3) for i in range(len(nfs)-1) ] + [nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], data.c)] def get_cnn_model(data, nfs): return nn.Sequential(*get_cnn_layers(data, nfs)) #export def get_runner(model, data, lr=0.6, cbs=None, opt_func=None, loss_func = F.cross_entropy): if opt_func is None: opt_func = optim.SGD opt = opt_func(model.parameters(), lr=lr) learn = Learner(model, opt, loss_func, data) return learn, Runner(cb_funcs=listify(cbs)) model = get_cnn_model(data, nfs) learn,run = get_runner(model, data, lr=0.4, cbs=cbfs) model run.fit(3, learn) class SequentialModel(nn.Module): def __init__(self, *layers): super().__init__() self.layers = nn.ModuleList(layers) self.act_means = [[] for _ in layers] self.act_stds = [[] for _ in layers] def __call__(self, x): for i,l in enumerate(self.layers): x = l(x) self.act_means[i].append(x.data.mean()) self.act_stds [i].append(x.data.std ()) return x def __iter__(self): return iter(self.layers) model = SequentialModel(*get_cnn_layers(data, nfs)) learn,run = get_runner(model, data, lr=0.9, cbs=cbfs) run.fit(2, learn) for l in model.act_means: plt.plot(l) plt.legend(range(6)); for l in model.act_stds: plt.plot(l) plt.legend(range(6)); for l in model.act_means: plt.plot(l[:10]) plt.legend(range(6)); for l in model.act_stds: plt.plot(l[:10]) plt.legend(range(6)); model = get_cnn_model(data, nfs) learn,run = get_runner(model, data, lr=0.5, cbs=cbfs) act_means = [[] for _ in model] act_stds = [[] for _ in model] def append_stats(i, mod, inp, outp): act_means[i].append(outp.data.mean()) act_stds [i].append(outp.data.std()) for i,m in enumerate(model): m.register_forward_hook(partial(append_stats, i)) run.fit(1, learn) for o in act_means: plt.plot(o) plt.legend(range(5)); #export def children(m): return list(m.children()) class Hook(): def __init__(self, m, f): self.hook = m.register_forward_hook(partial(f, self)) def remove(self): self.hook.remove() def __del__(self): self.remove() def append_stats(hook, mod, inp, outp): if not hasattr(hook,'stats'): hook.stats = ([],[]) means,stds = hook.stats means.append(outp.data.mean()) stds .append(outp.data.std()) model = get_cnn_model(data, nfs) learn,run = get_runner(model, data, lr=0.5, cbs=cbfs) hooks = [Hook(l, append_stats) for l in children(model[:4])] run.fit(1, learn) for h in hooks: plt.plot(h.stats[0]) h.remove() plt.legend(range(4)); #export class ListContainer(): def __init__(self, items): self.items = listify(items) def __getitem__(self, idx): if isinstance(idx, (int,slice)): return self.items[idx] if isinstance(idx[0],bool): assert len(idx)==len(self) # bool mask return [o for m,o in zip(idx,self.items) if m] return [self.items[i] for i in idx] def __len__(self): return len(self.items) def __iter__(self): return iter(self.items) def __setitem__(self, i, o): self.items[i] = o def __delitem__(self, i): del(self.items[i]) def __repr__(self): res = f'{self.__class__.__name__} ({len(self)} items)\n{self.items[:10]}' if len(self)>10: res = res[:-1]+ '...]' return res ListContainer(range(10)) ListContainer(range(100)) t = ListContainer(range(10)) t[[1,2]], t[[False]*8 + [True,False]] #export from torch.nn import init class Hooks(ListContainer): def __init__(self, ms, f): super().__init__([Hook(m, f) for m in ms]) def __enter__(self, *args): return self def __exit__ (self, *args): self.remove() def __del__(self): self.remove() def __delitem__(self, i): self[i].remove() super().__delitem__(i) def remove(self): for h in self: h.remove() model = get_cnn_model(data, nfs).cuda() learn,run = get_runner(model, data, lr=0.9, cbs=cbfs) hooks = Hooks(model, append_stats) hooks hooks.remove() x,y = next(iter(data.train_dl)) x = mnist_resize(x).cuda() x.mean(),x.std() p = model[0](x) p.mean(),p.std() for l in model: if isinstance(l, nn.Sequential): init.kaiming_normal_(l[0].weight) l[0].bias.data.zero_() p = model[0](x) p.mean(),p.std() with Hooks(model, append_stats) as hooks: run.fit(2, learn) fig,(ax0,ax1) = plt.subplots(1,2, figsize=(10,4)) for h in hooks: ms,ss = h.stats ax0.plot(ms[:10]) ax1.plot(ss[:10]) plt.legend(range(6)); fig,(ax0,ax1) = plt.subplots(1,2, figsize=(10,4)) for h in hooks: ms,ss = h.stats ax0.plot(ms) ax1.plot(ss) plt.legend(range(6)); def append_stats(hook, mod, inp, outp): if not hasattr(hook,'stats'): hook.stats = ([],[],[]) means,stds,hists = hook.stats means.append(outp.data.mean().cpu()) stds .append(outp.data.std().cpu()) hists.append(outp.data.cpu().histc(40,0,10)) #histc isn't implemented on the GPU model = get_cnn_model(data, nfs).cuda() learn,run = get_runner(model, data, lr=0.9, cbs=cbfs) for l in model: if isinstance(l, nn.Sequential): init.kaiming_normal_(l[0].weight) l[0].bias.data.zero_() with Hooks(model, append_stats) as hooks: run.fit(1, learn) # Thanks to @ste for initial version of histgram plotting code def get_hist(h): return torch.stack(h.stats[2]).t().float().log1p() fig,axes = plt.subplots(2,2, figsize=(15,6)) for ax,h in zip(axes.flatten(), hooks[:4]): ax.imshow(get_hist(h), origin='lower') ax.axis('off') plt.tight_layout() def get_min(h): h1 = torch.stack(h.stats[2]).t().float() return h1[:2].sum(0)/h1.sum(0) fig,axes = plt.subplots(2,2, figsize=(15,6)) for ax,h in zip(axes.flatten(), hooks[:4]): ax.plot(get_min(h)) ax.set_ylim(0,1) plt.tight_layout() #export def get_cnn_layers(data, nfs, layer, **kwargs): nfs = [1] + nfs return [layer(nfs[i], nfs[i+1], 5 if i==0 else 3, **kwargs) for i in range(len(nfs)-1)] + [ nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], data.c)] def conv_layer(ni, nf, ks=3, stride=2, **kwargs): return nn.Sequential( nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride), GeneralRelu(**kwargs)) class GeneralRelu(nn.Module): def __init__(self, leak=None, sub=None, maxv=None): super().__init__() self.leak,self.sub,self.maxv = leak,sub,maxv def forward(self, x): x = F.leaky_relu(x,self.leak) if self.leak is not None else F.relu(x) if self.sub is not None: x.sub_(self.sub) if self.maxv is not None: x.clamp_max_(self.maxv) return x def init_cnn(m, uniform=False): f = init.kaiming_uniform_ if uniform else init.kaiming_normal_ for l in m: if isinstance(l, nn.Sequential): f(l[0].weight, a=0.1) l[0].bias.data.zero_() def get_cnn_model(data, nfs, layer, **kwargs): return nn.Sequential(*get_cnn_layers(data, nfs, layer, **kwargs)) def append_stats(hook, mod, inp, outp): if not hasattr(hook,'stats'): hook.stats = ([],[],[]) means,stds,hists = hook.stats means.append(outp.data.mean().cpu()) stds .append(outp.data.std().cpu()) hists.append(outp.data.cpu().histc(40,-7,7)) model = get_cnn_model(data, nfs, conv_layer, leak=0.1, sub=0.4, maxv=6.) init_cnn(model) learn,run = get_runner(model, data, lr=0.9, cbs=cbfs) with Hooks(model, append_stats) as hooks: run.fit(1, learn) fig,(ax0,ax1) = plt.subplots(1,2, figsize=(10,4)) for h in hooks: ms,ss,hi = h.stats ax0.plot(ms[:10]) ax1.plot(ss[:10]) h.remove() plt.legend(range(5)); fig,(ax0,ax1) = plt.subplots(1,2, figsize=(10,4)) for h in hooks: ms,ss,hi = h.stats ax0.plot(ms) ax1.plot(ss) plt.legend(range(5)); fig,axes = plt.subplots(2,2, figsize=(15,6)) for ax,h in zip(axes.flatten(), hooks[:4]): ax.imshow(get_hist(h), origin='lower') ax.axis('off') plt.tight_layout() def get_min(h): h1 = torch.stack(h.stats[2]).t().float() return h1[19:22].sum(0)/h1.sum(0) fig,axes = plt.subplots(2,2, figsize=(15,6)) for ax,h in zip(axes.flatten(), hooks[:4]): ax.plot(get_min(h)) ax.set_ylim(0,1) plt.tight_layout() #export def get_learn_run(nfs, data, lr, layer, cbs=None, opt_func=None, uniform=False, **kwargs): model = get_cnn_model(data, nfs, layer, **kwargs) init_cnn(model, uniform=uniform) return get_runner(model, data, lr=lr, cbs=cbs, opt_func=opt_func) sched = combine_scheds([0.5, 0.5], [sched_cos(0.2, 1.), sched_cos(1., 0.1)]) learn,run = get_learn_run(nfs, data, 1., conv_layer, cbs=cbfs+[partial(ParamScheduler,'lr', sched)]) run.fit(8, learn) learn,run = get_learn_run(nfs, data, 1., conv_layer, uniform=True, cbs=cbfs+[partial(ParamScheduler,'lr', sched)]) run.fit(8, learn) #export from IPython.display import display, Javascript def nb_auto_export(): display(Javascript("""{ const ip = IPython.notebook if (ip) { ip.save_notebook() console.log('a') const s = `!python notebook2script.py ${ip.notebook_name}` if (ip.kernel) { ip.kernel.execute(s) } } }""")) nb_auto_export()