%matplotlib inline %reload_ext autoreload %autoreload 2 from fastai.conv_learner import * PATH = Path("../data/cifar10/") classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159])) def get_data(sz,bs): tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8) return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs) size = 32 batch_size = 64 data = get_data(size,batch_size) def SimpleNet(layers): list_layers=[Flatten()] for i in range(len(layers)-1): list_layers.append(nn.Linear(layers[i], layers[i + 1])) if i < len(layers)-2: list_layers.append(nn.ReLU(inplace=True)) else: list_layers.append(nn.LogSoftmax(dim=0)) return nn.Sequential(*list_layers) learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40,10]), data) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=2, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) learn.sched.plot_lr() learn.sched.plot_lr(show_text=False) learn.sched.plot_lr(show_text=False, show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.LINEAR), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) lr_i = start_lr + (end_lr - start_lr) * i/n learn.sched.plot_lr(show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.COSINE), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) lr_i = end_lr + (start_lr - end_lr)/2 * ( 1 + np.cos( i * np.pi) / n ) learn.sched.plot_lr(show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.EXPONENTIAL), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) lr_i = start_lr * (end_lr/start_lr)**(i/n) learn.sched.plot_lr(show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=(DecayType.POLYNOMIAL, 2)), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) lr_i = end_lr + (start_lr - end_lr) * (1 - i/n) ** p learn.sched.plot_lr(show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=(DecayType.POLYNOMIAL, 0.5)), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) learn.sched.plot_lr(show_moms=False) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2, lr_decay=DecayType.COSINE), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)] learn.fit_opt_sched(phases) learn.sched.plot_lr(show_moms=False) def phases_sgdr(lr, opt_fn, num_cycle,cycle_len,cycle_mult): phases = [TrainingPhase(epochs = cycle_len/ 20, opt_fn=opt_fn, lr=lr/100), TrainingPhase(epochs = cycle_len * 19/20, opt_fn=opt_fn, lr=lr, lr_decay=DecayType.COSINE)] for i in range(1,num_cycle): phases.append(TrainingPhase(epochs = cycle_len * (cycle_mult**i), opt_fn=opt_fn, lr=lr, lr_decay=DecayType.COSINE)) return phases learn.fit_opt_sched(phases_sgdr(1e-2, optim.Adam, 2, 1, 2)) learn.sched.plot_lr(show_text=False,show_moms=False) def phases_1cycle(cycle_len,lr,div,pct,max_mom,min_mom): tri_cyc = (1-pct/100) * cycle_len return [TrainingPhase(epochs=tri_cyc/2, opt_fn=optim.SGD, lr=(lr/div,lr), lr_decay=DecayType.LINEAR, momentum=(max_mom,min_mom), momentum_decay=DecayType.LINEAR), TrainingPhase(epochs=tri_cyc/2, opt_fn=optim.SGD, lr=(lr,lr/div), lr_decay=DecayType.LINEAR, momentum=(min_mom,max_mom), momentum_decay=DecayType.LINEAR), TrainingPhase(epochs=cycle_len-tri_cyc, opt_fn=optim.SGD, lr=(lr/div,lr/(100*div)), lr_decay=DecayType.LINEAR, momentum=max_mom)] learn.fit_opt_sched(phases_1cycle(3, 1e-2, 10, 10, 0.95, 0.85)) learn.sched.plot_lr(show_text=False) learn = ConvLearner.pretrained(resnet34, data, metrics=[accuracy]) learn.unfreeze() lr = 1e-2 lrs = np.array([lr/100,lr/10,lr]) phases = [TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=(lrs/10,lrs), lr_decay=DecayType.LINEAR), TrainingPhase(epochs=2, opt_fn=optim.Adam, lr=lrs, lr_decay=DecayType.COSINE)] learn.fit_opt_sched(phases) learn.sched.plot_lr(show_text=False, show_moms=False) phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(1e-5,10), lr_decay=DecayType.EXPONENTIAL,momentum=0.9)] learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40,10]), data) learn.save('tmp') learn.fit_opt_sched(phases, stop_div=True) learn.load('tmp') learn.sched.plot() phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(0.001,0.1), lr_decay=DecayType.LINEAR,momentum=0.9)] learn.save('tmp') learn.fit_opt_sched(phases, stop_div=True) learn.load('tmp') learn.sched.plot() phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(1e-5,10), lr_decay=DecayType.EXPONENTIAL,momentum=0.9)] learn.save('tmp') learn.fit_opt_sched(phases, stop_div=True) learn.load('tmp') learn.sched.plot(linear=True) phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr=(1e-3,1e-2), lr_decay=DecayType.LINEAR, momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR), TrainingPhase(epochs=1, opt_fn=optim.SGD, lr=(1e-2,1e-3), lr_decay=DecayType.LINEAR, momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR), TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=1e-3, lr_decay=DecayType.COSINE, momentum=0.9)] learn.fit_opt_sched(phases) learn.sched.plot_lr() def ConvBN(n_in, n_out, stride): return nn.Sequential(nn.Conv2d(n_in,n_out,3,stride=stride,padding=1), nn.BatchNorm2d(n_out)) def ShallowConvNet(): listlayers = [ConvBN(3,64,20), nn.ReLU(inplace=True), ConvBN(64,128,2), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(128,10), nn.LogSoftmax(dim=0)] return nn.Sequential(*listlayers) data1 = get_data(28,batch_size) data2 = get_data(32,batch_size) learn = ConvLearner.from_model_data(ShallowConvNet(), data1) phases = [TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=1e-2, lr_decay=DecayType.COSINE), TrainingPhase(epochs=2, opt_fn=optim.Adam, lr=1e-2, lr_decay=DecayType.COSINE)] learn.fit_opt_sched(phases, data_list=[data1,data2]) learn.sched.plot_lr(show_moms=False)