%matplotlib inline
%reload_ext autoreload
%autoreload 2

from fastai.conv_learner import *
PATH = Path("../data/cifar10/")

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

def get_data(sz,bs):
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

size = 32
batch_size = 64

data = get_data(size,batch_size)

def SimpleNet(layers):
    list_layers=[Flatten()]
    for i in range(len(layers)-1):
        list_layers.append(nn.Linear(layers[i], layers[i + 1]))
        if i < len(layers)-2: list_layers.append(nn.ReLU(inplace=True))
        else: list_layers.append(nn.LogSoftmax(dim=0))
    return nn.Sequential(*list_layers)

learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40,10]), data)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), TrainingPhase(epochs=2, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

learn.sched.plot_lr()

learn.sched.plot_lr(show_text=False)

learn.sched.plot_lr(show_text=False, show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.LINEAR),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

lr_i = start_lr + (end_lr - start_lr) * i/n

learn.sched.plot_lr(show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.COSINE),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

lr_i = end_lr + (start_lr - end_lr)/2 * ( 1 +  np.cos( i * np.pi) / n )

learn.sched.plot_lr(show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=DecayType.EXPONENTIAL),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

lr_i = start_lr * (end_lr/start_lr)**(i/n)

learn.sched.plot_lr(show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=(DecayType.POLYNOMIAL, 2)),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

lr_i = end_lr + (start_lr - end_lr) * (1 - i/n) ** p

learn.sched.plot_lr(show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = (1e-2,1e-3), lr_decay=(DecayType.POLYNOMIAL, 0.5)),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

learn.sched.plot_lr(show_moms=False)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2), 
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-2, lr_decay=DecayType.COSINE),
          TrainingPhase(epochs=1, opt_fn=optim.SGD, lr = 1e-3)]

learn.fit_opt_sched(phases)

learn.sched.plot_lr(show_moms=False)

def phases_sgdr(lr, opt_fn, num_cycle,cycle_len,cycle_mult):
    phases = [TrainingPhase(epochs = cycle_len/ 20, opt_fn=opt_fn, lr=lr/100),
              TrainingPhase(epochs = cycle_len * 19/20, opt_fn=opt_fn, lr=lr, lr_decay=DecayType.COSINE)]
    for i in range(1,num_cycle):
        phases.append(TrainingPhase(epochs = cycle_len * (cycle_mult**i), opt_fn=opt_fn, lr=lr, lr_decay=DecayType.COSINE))
    return phases

learn.fit_opt_sched(phases_sgdr(1e-2, optim.Adam, 2, 1, 2))

learn.sched.plot_lr(show_text=False,show_moms=False)

def phases_1cycle(cycle_len,lr,div,pct,max_mom,min_mom):
    tri_cyc = (1-pct/100) * cycle_len
    return [TrainingPhase(epochs=tri_cyc/2, opt_fn=optim.SGD, lr=(lr/div,lr), lr_decay=DecayType.LINEAR, 
                          momentum=(max_mom,min_mom), momentum_decay=DecayType.LINEAR),
           TrainingPhase(epochs=tri_cyc/2, opt_fn=optim.SGD, lr=(lr,lr/div), lr_decay=DecayType.LINEAR, 
                          momentum=(min_mom,max_mom), momentum_decay=DecayType.LINEAR),
           TrainingPhase(epochs=cycle_len-tri_cyc, opt_fn=optim.SGD, lr=(lr/div,lr/(100*div)), lr_decay=DecayType.LINEAR, 
                          momentum=max_mom)]

learn.fit_opt_sched(phases_1cycle(3, 1e-2, 10, 10, 0.95, 0.85))

learn.sched.plot_lr(show_text=False)

learn = ConvLearner.pretrained(resnet34, data, metrics=[accuracy])

learn.unfreeze()
lr = 1e-2
lrs = np.array([lr/100,lr/10,lr])

phases = [TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=(lrs/10,lrs), lr_decay=DecayType.LINEAR),
          TrainingPhase(epochs=2, opt_fn=optim.Adam, lr=lrs, lr_decay=DecayType.COSINE)]

learn.fit_opt_sched(phases)

learn.sched.plot_lr(show_text=False, show_moms=False)

phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(1e-5,10), lr_decay=DecayType.EXPONENTIAL,momentum=0.9)]

learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40,10]), data)

learn.save('tmp')
learn.fit_opt_sched(phases, stop_div=True)
learn.load('tmp')

learn.sched.plot()

phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(0.001,0.1), lr_decay=DecayType.LINEAR,momentum=0.9)]
learn.save('tmp')
learn.fit_opt_sched(phases, stop_div=True)
learn.load('tmp')

learn.sched.plot()

phases = [TrainingPhase(epochs=0.25, opt_fn=optim.SGD, lr=(1e-5,10), lr_decay=DecayType.EXPONENTIAL,momentum=0.9)]
learn.save('tmp')
learn.fit_opt_sched(phases, stop_div=True)
learn.load('tmp')

learn.sched.plot(linear=True)

phases = [TrainingPhase(epochs=1, opt_fn=optim.SGD, lr=(1e-3,1e-2), lr_decay=DecayType.LINEAR, 
                          momentum=(0.95,0.85), momentum_decay=DecayType.LINEAR),
           TrainingPhase(epochs=1, opt_fn=optim.SGD, lr=(1e-2,1e-3), lr_decay=DecayType.LINEAR, 
                          momentum=(0.85,0.95), momentum_decay=DecayType.LINEAR),
           TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=1e-3, lr_decay=DecayType.COSINE, momentum=0.9)]

learn.fit_opt_sched(phases)

learn.sched.plot_lr()

def ConvBN(n_in, n_out, stride):
    return nn.Sequential(nn.Conv2d(n_in,n_out,3,stride=stride,padding=1), nn.BatchNorm2d(n_out))

def ShallowConvNet():
    listlayers = [ConvBN(3,64,20), nn.ReLU(inplace=True), ConvBN(64,128,2), nn.ReLU(inplace=True),
                  nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(128,10), nn.LogSoftmax(dim=0)]
    return nn.Sequential(*listlayers)

data1 = get_data(28,batch_size)
data2 = get_data(32,batch_size)

learn = ConvLearner.from_model_data(ShallowConvNet(), data1)

phases = [TrainingPhase(epochs=1, opt_fn=optim.Adam, lr=1e-2, lr_decay=DecayType.COSINE),
          TrainingPhase(epochs=2, opt_fn=optim.Adam, lr=1e-2, lr_decay=DecayType.COSINE)]

learn.fit_opt_sched(phases, data_list=[data1,data2])

learn.sched.plot_lr(show_moms=False)