Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x
%matplotlib inline
%reload_ext autoreload
%autoreload 2
from fastai.conv_learner import *
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)
torch.cuda.set_device(1)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159]))
num_workers = num_cpus()//2
bs=256
sz=32
tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)
def conv_layer(ni, nf, ks=3, stride=1):
return nn.Sequential(
nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),
nn.BatchNorm2d(nf, momentum=0.01),
nn.LeakyReLU(negative_slope=0.1, inplace=True))
class ResLayer(nn.Module):
def __init__(self, ni):
super().__init__()
self.conv1=conv_layer(ni, ni//2, ks=1)
self.conv2=conv_layer(ni//2, ni, ks=3)
def forward(self, x):
# changed to x.add, as x.add_ leads to error (happened on single GPU):
# one of the variables needed for gradient computation has been modified by an inplace operation
return x.add(self.conv2(self.conv1(x)))
class Darknet(nn.Module):
def make_group_layer(self, ch_in, num_blocks, stride=1):
return [conv_layer(ch_in, ch_in*2,stride=stride)
] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]
def __init__(self, num_blocks, num_classes, nf=32):
super().__init__()
layers = [conv_layer(3, nf, ks=3, stride=1)]
for i,nb in enumerate(num_blocks):
layers += self.make_group_layer(nf, nb, stride=2-(i==1))
nf *= 2
layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
self.layers = nn.Sequential(*layers)
def forward(self, x): return self.layers(x)
m = Darknet([1, 2, 4, 6, 3], num_classes=10, nf=32)
m = nn.DataParallel(m, device_ids=None)
# if you have several GPUs for true parallel processing enable
# m = nn.DataParallel(m, device_ids=[1, 2, 3])
lr = 1.3
learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.057594 1.163891 0.6072 1 0.791428 0.859953 0.7078 2 0.642778 0.884299 0.7168 3 0.586491 0.85431 0.7149 4 0.530411 0.724244 0.7607 5 0.492245 0.718871 0.764 6 0.465657 0.546274 0.8112 7 0.437823 0.536493 0.8182 8 0.440909 0.69369 0.7729 9 0.408925 1.115436 0.7126 10 0.401172 0.902935 0.733 11 0.397317 0.690258 0.7921 12 0.376588 0.514558 0.8287 13 0.366199 0.442919 0.8527 14 0.345316 0.796473 0.7753 15 0.333985 0.405802 0.8646 16 0.306255 0.492593 0.8455 17 0.307262 0.405131 0.8656 18 0.285951 0.504936 0.8388 19 0.262031 0.564698 0.8289 20 0.255914 0.602085 0.8234 21 0.20444 0.316775 0.9016 22 0.161276 0.281515 0.9096 23 0.110206 0.221838 0.929 24 0.071019 0.203805 0.9368 25 0.056447 0.220052 0.9346 26 0.047433 0.206167 0.9388 27 0.0333 0.212472 0.94 28 0.025766 0.209915 0.9423 29 0.021697 0.207422 0.9427 CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s Wall time: 15min 12s
[array([0.20742]), 0.9427]
# DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.294204 1.058497 0.6254
[array([1.0585]), 0.6254]
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.057594 1.163891 0.6072 1 0.791428 0.859953 0.7078 2 0.642778 0.884299 0.7168 3 0.586491 0.85431 0.7149 4 0.530411 0.724244 0.7607 5 0.492245 0.718871 0.764 6 0.465657 0.546274 0.8112 7 0.437823 0.536493 0.8182 8 0.440909 0.69369 0.7729 9 0.408925 1.115436 0.7126 10 0.401172 0.902935 0.733 11 0.397317 0.690258 0.7921 12 0.376588 0.514558 0.8287 13 0.366199 0.442919 0.8527 14 0.345316 0.796473 0.7753 15 0.333985 0.405802 0.8646 16 0.306255 0.492593 0.8455 17 0.307262 0.405131 0.8656 18 0.285951 0.504936 0.8388 19 0.262031 0.564698 0.8289 20 0.255914 0.602085 0.8234 21 0.20444 0.316775 0.9016 22 0.161276 0.281515 0.9096 23 0.110206 0.221838 0.929 24 0.071019 0.203805 0.9368 25 0.056447 0.220052 0.9346 26 0.047433 0.206167 0.9388 27 0.0333 0.212472 0.94 28 0.025766 0.209915 0.9423 29 0.021697 0.207422 0.9427 CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s Wall time: 15min 12s
[array([0.20742]), 0.9427]
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.290646 1.08506 0.6043
[array([1.08506]), 0.6043]
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(10, 15, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.084963 1.085174 0.6082 1 0.825962 1.001847 0.6643 2 0.702493 0.930496 0.6788 3 0.593327 0.76002 0.7503 4 0.543732 0.654882 0.7788 5 0.503807 0.954524 0.7058 6 0.460451 0.520503 0.8216 7 0.429511 0.73571 0.7748 8 0.419969 0.555502 0.8179 9 0.415771 0.593286 0.8055 10 0.40077 0.642513 0.8029 11 0.385437 0.965159 0.7204 12 0.383747 0.691304 0.783 13 0.35558 0.705928 0.774 14 0.355323 0.631229 0.8075 15 0.354718 0.796365 0.7543 16 0.348135 0.771605 0.7833 17 0.330313 0.57747 0.819 18 0.319511 0.58501 0.8249 19 0.305789 0.553965 0.8239 20 0.309558 0.632576 0.794 21 0.312331 0.490521 0.8399 22 0.292812 0.740069 0.7778 23 0.277894 0.671362 0.8029 24 0.273641 0.489029 0.8439 25 0.262769 0.492826 0.8458 26 0.262545 0.417226 0.8648 27 0.24185 0.522333 0.8441 28 0.235303 0.573098 0.8327 29 0.223754 0.468723 0.8581 30 0.212939 0.421378 0.8665 31 0.168992 0.31097 0.9021 32 0.162031 0.339769 0.8921 33 0.11894 0.280535 0.9163 34 0.091013 0.250175 0.9251 35 0.067888 0.2331 0.9318 36 0.047799 0.226535 0.9368 37 0.030331 0.216385 0.9405 38 0.020049 0.204373 0.9429 39 0.012407 0.20191 0.9448 CPU times: user 31min 18s, sys: 8min 10s, total: 39min 28s Wall time: 39min 28s
[array([0.20191]), 0.9448]
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.272734 1.096014 0.6024
[array([1.09601]), 0.6024]
%time learn.fit(1., 1, wds=wd, cycle_len=30, use_clr_beta=(10, 25, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.05031 1.058887 0.6364 1 0.784812 1.010626 0.6695 2 0.662347 0.962157 0.6972 3 0.575188 0.63956 0.7755 4 0.516394 0.830657 0.7348 5 0.465992 0.802316 0.7552 6 0.437692 0.666769 0.7819 7 0.421664 0.699891 0.7839 8 0.409448 0.716692 0.7687 9 0.378664 0.57033 0.8045 10 0.378573 0.713038 0.7787 11 0.358377 0.843966 0.7419 12 0.328598 0.72601 0.7865 13 0.32008 0.702174 0.7723 14 0.299527 0.478337 0.8429 15 0.292695 0.692018 0.7925 16 0.279109 0.502219 0.8366 17 0.252726 0.3686 0.8795 18 0.243707 0.375788 0.8743 19 0.220037 0.447402 0.8516 20 0.200196 0.408494 0.8703 21 0.145898 0.295205 0.9067 22 0.10881 0.255926 0.9216 23 0.086912 0.239246 0.9252 24 0.065587 0.299195 0.9173 25 0.058427 0.239054 0.9322 26 0.035484 0.233135 0.9357 27 0.026134 0.229944 0.9382 28 0.018713 0.225882 0.9424 29 0.012363 0.228542 0.9416 CPU times: user 23min 22s, sys: 6min 6s, total: 29min 28s Wall time: 29min 27s
[array([0.22854]), 0.9416]
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(100, 15, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.428835 1.412945 0.5194 1 0.995178 2.106482 0.4173 2 0.783325 1.311001 0.6189 3 0.650752 0.948611 0.6966 4 0.572025 0.611907 0.7906 5 0.540067 0.853141 0.7339 6 0.479793 0.552199 0.8167 7 0.457727 0.951997 0.7204 8 0.429621 0.825224 0.7421 9 0.412101 1.17335 0.6826 10 0.4009 0.808628 0.7315 11 0.383221 0.60638 0.804 12 0.36901 0.647075 0.7874 13 0.368189 0.741168 0.7825 14 0.366653 0.653589 0.7973 15 0.355023 0.71174 0.7841 16 0.354656 0.581478 0.8107 17 0.335075 0.602485 0.8103 18 0.336688 0.677685 0.7901 19 0.318256 0.670932 0.7865 20 0.30799 0.627659 0.8039 21 0.298475 0.429212 0.8597 22 0.297467 0.565836 0.8317 23 0.273995 0.513813 0.8408 24 0.270703 0.557105 0.822 25 0.259869 0.370845 0.8781 26 0.242922 0.517631 0.8333 27 0.22433 0.424905 0.8628 28 0.217046 0.460085 0.8528 29 0.179096 0.329448 0.8916 30 0.15454 0.34805 0.892 31 0.125648 0.262553 0.919 32 0.0801 0.244773 0.9244 33 0.047182 0.206418 0.9396 34 0.082104 0.277417 0.9182 35 0.070456 0.292839 0.9197 36 0.049607 0.245121 0.9325 37 0.03186 0.245641 0.9357 38 0.017482 0.21458 0.9405 39 0.012877 0.207285 0.9427 CPU times: user 31min 8s, sys: 8min 12s, total: 39min 20s Wall time: 39min 22s
[array([0.20728]), 0.9427]
# darknet 2222 lr 1.3 65 cl
%time learn.fit(lr, 1, wds=wd, cycle_len=65, use_clr_beta=(30, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy 0 1.533084 1.725181 0.4627 1 1.224625 1.241789 0.5727 2 0.995259 1.005162 0.6476 3 0.865179 0.949657 0.6642 4 0.756122 0.854677 0.6968 5 0.692097 1.110497 0.6578 6 0.635014 0.805524 0.7227 7 0.588518 0.759759 0.7334 8 0.567764 0.868924 0.7131 9 0.547826 0.700656 0.7645 10 0.524676 1.005113 0.6889 11 0.50653 0.721323 0.7645 12 0.493718 1.125408 0.6608 13 0.479434 0.756994 0.7639 14 0.475674 0.73913 0.7589 15 0.464452 0.612312 0.7955 16 0.453685 0.772014 0.757 17 0.436029 0.60522 0.7943 18 0.437321 0.555058 0.8158 19 0.439846 0.819791 0.7449 20 0.420495 0.994983 0.719 21 0.416594 0.687188 0.7813 22 0.413399 0.714974 0.7787 23 0.421343 0.696471 0.7761 24 0.41174 0.853185 0.7445 25 0.411808 0.693145 0.7781 26 0.412166 0.847656 0.7456 27 0.402742 0.73174 0.772 28 0.391636 0.685092 0.7868 29 0.384671 0.635394 0.7931 30 0.364357 0.856764 0.7271 31 0.374435 0.490243 0.8325 32 0.364152 0.685217 0.7872 33 0.361441 0.724616 0.7843 34 0.344948 0.541638 0.8189 35 0.341661 0.604952 0.8152 36 0.337969 0.571531 0.8172 37 0.328699 0.55272 0.8177 38 0.32664 0.429266 0.8554 39 0.316233 0.424243 0.8555 40 0.302454 0.455984 0.8502 41 0.296169 0.61181 0.8123 42 0.283048 0.572225 0.8267 43 0.275228 0.453885 0.853 44 0.273048 0.408815 0.863 45 0.254404 0.397202 0.8715 46 0.219166 0.403471 0.868 47 0.215263 0.323341 0.8928 48 0.192285 0.37336 0.8824 49 0.163661 0.270863 0.9095 50 0.118515 0.269602 0.9151 51 0.089315 0.209591 0.9317 52 0.058886 0.212586 0.9339 53 0.05148 0.212392 0.9345 54 0.046729 0.232031 0.9343 55 0.038997 0.231949 0.9349 56 0.035254 0.233632 0.9349 57 0.03046 0.232361 0.937 58 0.027203 0.22916 0.94 59 0.020285 0.231641 0.9401 60 0.017448 0.23432 0.9405 61 0.016971 0.232452 0.9415 62 0.011784 0.23313 0.9416 63 0.011399 0.233199 0.9432 64 0.009589 0.233732 0.9422 CPU times: user 1h 5min 54s, sys: 16min 4s, total: 1h 21min 59s Wall time: 57min 23s
[array([0.23373]), 0.9422]