You can get the data via:
wget http://pjreddie.com/media/files/cifar.tgz
Important: Before proceeding, the student must reorganize the downloaded dataset files to match the expected directory structure, so that there is a dedicated folder for each class under 'test' and 'train', e.g.:
* test/airplane/airplane-1001.png
* test/bird/bird-1043.png
* train/bird/bird-10018.png
* train/automobile/automobile-10000.png
The filename of the image doesn't have to include its class.
from fastai.conv_learner import *
PATH = "data/cifar10/"
!ls {PATH}
if not os.path.exists(f"{PATH}/train/bird"):
raise Exception("expecting class subdirs under 'train/' and 'test/'")
!ls {PATH}/train
labels.txt test train airplane automobile bird cat deer dog frog horse ship truck
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159]))
def get_data(sz,bs):
tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)
data = get_data(32,4)
data = get_data(32,bs)
From this notebook by our student Kerem Turgutlu:
class SimpleNet(nn.Module):
def __init__(self, layers):
self.layers = nn.ModuleList([
nn.Linear(layers[i], layers[i + 1]) for i in range(len(layers) - 1)])
def forward(self, x):
x = x.view(x.size(0), -1)
for l in self.layers:
l_x = l(x)
x = F.relu(l_x)
return F.log_softmax(l_x, dim=-1)
learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40,10]), data)
learn, [o.numel() for o in learn.model.parameters()]
(SimpleNet( (layers): ModuleList( (0): Linear(in_features=3072, out_features=40) (1): Linear(in_features=40, out_features=10) ) ), [122880, 40, 400, 10])
OrderedDict([('Linear-1', OrderedDict([('input_shape', [-1, 3072]), ('output_shape', [-1, 40]), ('trainable', True), ('nb_params', 122920)])), ('Linear-2', OrderedDict([('input_shape', [-1, 40]), ('output_shape', [-1, 10]), ('trainable', True), ('nb_params', 410)]))])
%time learn.fit(lr, 2)
[ 0. 1.7658 1.64148 0.42129] [ 1. 1.68074 1.57897 0.44131] CPU times: user 1min 11s, sys: 32.3 s, total: 1min 44s Wall time: 55.1 s
%time learn.fit(lr, 2, cycle_len=1)
[ 0. 1.60857 1.51711 0.46631] [ 1. 1.59361 1.50341 0.46924] CPU times: user 1min 12s, sys: 31.8 s, total: 1min 44s Wall time: 55.3 s
class ConvNet(nn.Module):
def __init__(self, layers, c):
self.layers = nn.ModuleList([
nn.Conv2d(layers[i], layers[i + 1], kernel_size=3, stride=2)
for i in range(len(layers) - 1)])
self.pool = nn.AdaptiveMaxPool2d(1)
self.out = nn.Linear(layers[-1], c)
def forward(self, x):
for l in self.layers: x = F.relu(l(x))
x = self.pool(x)
x = x.view(x.size(0), -1)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(ConvNet([3, 20, 40, 80], 10), data)
OrderedDict([('Conv2d-1', OrderedDict([('input_shape', [-1, 3, 32, 32]), ('output_shape', [-1, 20, 15, 15]), ('trainable', True), ('nb_params', 560)])), ('Conv2d-2', OrderedDict([('input_shape', [-1, 20, 15, 15]), ('output_shape', [-1, 40, 7, 7]), ('trainable', True), ('nb_params', 7240)])), ('Conv2d-3', OrderedDict([('input_shape', [-1, 40, 7, 7]), ('output_shape', [-1, 80, 3, 3]), ('trainable', True), ('nb_params', 28880)])), ('AdaptiveMaxPool2d-4', OrderedDict([('input_shape', [-1, 80, 3, 3]), ('output_shape', [-1, 80, 1, 1]), ('nb_params', 0)])), ('Linear-5', OrderedDict([('input_shape', [-1, 80]), ('output_shape', [-1, 10]), ('trainable', True), ('nb_params', 810)]))])
%time learn.fit(1e-1, 2)
[ 0. 1.72594 1.63399 0.41338] [ 1. 1.51599 1.49687 0.45723] CPU times: user 1min 14s, sys: 32.3 s, total: 1min 46s Wall time: 56.5 s
%time learn.fit(1e-1, 4, cycle_len=1)
[ 0. 1.36734 1.28901 0.53418] [ 1. 1.28854 1.21991 0.56143] [ 2. 1.22854 1.15514 0.58398] [ 3. 1.17904 1.12523 0.59922] CPU times: user 2min 21s, sys: 1min 3s, total: 3min 24s Wall time: 1min 46s
class ConvLayer(nn.Module):
def __init__(self, ni, nf):
self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)
def forward(self, x): return F.relu(self.conv(x))
class ConvNet2(nn.Module):
def __init__(self, layers, c):
self.layers = nn.ModuleList([ConvLayer(layers[i], layers[i + 1])
for i in range(len(layers) - 1)])
self.out = nn.Linear(layers[-1], c)
def forward(self, x):
for l in self.layers: x = l(x)
x = F.adaptive_max_pool2d(x, 1)
x = x.view(x.size(0), -1)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(ConvNet2([3, 20, 40, 80], 10), data)
OrderedDict([('Conv2d-1', OrderedDict([('input_shape', [-1, 3, 32, 32]), ('output_shape', [-1, 20, 16, 16]), ('trainable', True), ('nb_params', 560)])), ('ConvLayer-2', OrderedDict([('input_shape', [-1, 3, 32, 32]), ('output_shape', [-1, 20, 16, 16]), ('nb_params', 0)])), ('Conv2d-3', OrderedDict([('input_shape', [-1, 20, 16, 16]), ('output_shape', [-1, 40, 8, 8]), ('trainable', True), ('nb_params', 7240)])), ('ConvLayer-4', OrderedDict([('input_shape', [-1, 20, 16, 16]), ('output_shape', [-1, 40, 8, 8]), ('nb_params', 0)])), ('Conv2d-5', OrderedDict([('input_shape', [-1, 40, 8, 8]), ('output_shape', [-1, 80, 4, 4]), ('trainable', True), ('nb_params', 28880)])), ('ConvLayer-6', OrderedDict([('input_shape', [-1, 40, 8, 8]), ('output_shape', [-1, 80, 4, 4]), ('nb_params', 0)])), ('Linear-7', OrderedDict([('input_shape', [-1, 80]), ('output_shape', [-1, 10]), ('trainable', True), ('nb_params', 810)]))])
%time learn.fit(1e-1, 2)
[ 0. 1.70151 1.64982 0.3832 ] [ 1. 1.50838 1.53231 0.44795] CPU times: user 1min 6s, sys: 28.5 s, total: 1min 35s Wall time: 48.8 s
%time learn.fit(1e-1, 2, cycle_len=1)
[ 0. 1.51605 1.42927 0.4751 ] [ 1. 1.40143 1.33511 0.51787] CPU times: user 1min 6s, sys: 27.7 s, total: 1min 34s Wall time: 48.7 s
class BnLayer(nn.Module):
def __init__(self, ni, nf, stride=2, kernel_size=3):
self.conv = nn.Conv2d(ni, nf, kernel_size=kernel_size, stride=stride,
bias=False, padding=1)
self.a = nn.Parameter(torch.zeros(nf,1,1))
self.m = nn.Parameter(torch.ones(nf,1,1))
def forward(self, x):
x = F.relu(self.conv(x))
x_chan = x.transpose(0,1).contiguous().view(x.size(1), -1)
if self.training:
self.means = x_chan.mean(1)[:,None,None]
self.stds = x_chan.std (1)[:,None,None]
return (x-self.means) / self.stds *self.m + self.a
class ConvBnNet(nn.Module):
def __init__(self, layers, c):
self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
for i in range(len(layers) - 1)])
self.out = nn.Linear(layers[-1], c)
def forward(self, x):
x = self.conv1(x)
for l in self.layers: x = l(x)
x = F.adaptive_max_pool2d(x, 1)
x = x.view(x.size(0), -1)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(ConvBnNet([10, 20, 40, 80, 160], 10), data)
OrderedDict([('Conv2d-1', OrderedDict([('input_shape', [-1, 3, 32, 32]), ('output_shape', [-1, 10, 32, 32]), ('trainable', True), ('nb_params', 760)])), ('Conv2d-2', OrderedDict([('input_shape', [-1, 10, 32, 32]), ('output_shape', [-1, 20, 16, 16]), ('trainable', True), ('nb_params', 1800)])), ('BnLayer-3', OrderedDict([('input_shape', [-1, 10, 32, 32]), ('output_shape', [-1, 20, 16, 16]), ('nb_params', 0)])), ('Conv2d-4', OrderedDict([('input_shape', [-1, 20, 16, 16]), ('output_shape', [-1, 40, 8, 8]), ('trainable', True), ('nb_params', 7200)])), ('BnLayer-5', OrderedDict([('input_shape', [-1, 20, 16, 16]), ('output_shape', [-1, 40, 8, 8]), ('nb_params', 0)])), ('Conv2d-6', OrderedDict([('input_shape', [-1, 40, 8, 8]), ('output_shape', [-1, 80, 4, 4]), ('trainable', True), ('nb_params', 28800)])), ('BnLayer-7', OrderedDict([('input_shape', [-1, 40, 8, 8]), ('output_shape', [-1, 80, 4, 4]), ('nb_params', 0)])), ('Conv2d-8', OrderedDict([('input_shape', [-1, 80, 4, 4]), ('output_shape', [-1, 160, 2, 2]), ('trainable', True), ('nb_params', 115200)])), ('BnLayer-9', OrderedDict([('input_shape', [-1, 80, 4, 4]), ('output_shape', [-1, 160, 2, 2]), ('nb_params', 0)])), ('Linear-10', OrderedDict([('input_shape', [-1, 160]), ('output_shape', [-1, 10]), ('trainable', True), ('nb_params', 1610)]))])
%time learn.fit(3e-2, 2)
[ 0. 1.4966 1.39257 0.48965] [ 1. 1.2975 1.20827 0.57148] CPU times: user 1min 16s, sys: 32.5 s, total: 1min 49s Wall time: 54.3 s
%time learn.fit(1e-1, 4, cycle_len=1)
[ 0. 1.20966 1.07735 0.61504] [ 1. 1.0771 0.97338 0.65215] [ 2. 1.00103 0.91281 0.67402] [ 3. 0.93574 0.89293 0.68135] CPU times: user 2min 34s, sys: 1min 4s, total: 3min 39s Wall time: 1min 50s
class ConvBnNet2(nn.Module):
def __init__(self, layers, c):
self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
self.layers = nn.ModuleList([BnLayer(layers[i], layers[i+1])
for i in range(len(layers) - 1)])
self.layers2 = nn.ModuleList([BnLayer(layers[i+1], layers[i + 1], 1)
for i in range(len(layers) - 1)])
self.out = nn.Linear(layers[-1], c)
def forward(self, x):
x = self.conv1(x)
for l,l2 in zip(self.layers, self.layers2):
x = l(x)
x = l2(x)
x = F.adaptive_max_pool2d(x, 1)
x = x.view(x.size(0), -1)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(ConvBnNet2([10, 20, 40, 80, 160], 10), data)
%time learn.fit(1e-2, 2)
[ 0. 1.53499 1.43782 0.47588] [ 1. 1.28867 1.22616 0.55537] CPU times: user 1min 22s, sys: 34.5 s, total: 1min 56s Wall time: 58.2 s
%time learn.fit(1e-2, 2, cycle_len=1)
[ 0. 1.10933 1.06439 0.61582] [ 1. 1.04663 0.98608 0.64609] CPU times: user 1min 21s, sys: 32.9 s, total: 1min 54s Wall time: 57.6 s
class ResnetLayer(BnLayer):
def forward(self, x): return x + super().forward(x)
class Resnet(nn.Module):
def __init__(self, layers, c):
self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
self.layers = nn.ModuleList([BnLayer(layers[i], layers[i+1])
for i in range(len(layers) - 1)])
self.layers2 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
for i in range(len(layers) - 1)])
self.layers3 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
for i in range(len(layers) - 1)])
self.out = nn.Linear(layers[-1], c)
def forward(self, x):
x = self.conv1(x)
for l,l2,l3 in zip(self.layers, self.layers2, self.layers3):
x = l3(l2(l(x)))
x = F.adaptive_max_pool2d(x, 1)
x = x.view(x.size(0), -1)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(Resnet([10, 20, 40, 80, 160], 10), data)
%time learn.fit(1e-2, 2, wds=wd)
[ 0. 1.58191 1.40258 0.49131] [ 1. 1.33134 1.21739 0.55625] CPU times: user 1min 27s, sys: 34.3 s, total: 2min 1s Wall time: 1min 3s
%time learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)
[ 0. 1.11534 1.05117 0.62549] [ 1. 1.06272 0.97874 0.65185] [ 2. 0.92913 0.90472 0.68154] [ 3. 0.97932 0.94404 0.67227] [ 4. 0.88057 0.84372 0.70654] [ 5. 0.77817 0.77815 0.73018] [ 6. 0.73235 0.76302 0.73633] CPU times: user 5min 2s, sys: 1min 59s, total: 7min 1s Wall time: 3min 39s
%time learn.fit(1e-2, 8, cycle_len=4, wds=wd)
[ 0. 0.8307 0.83635 0.7126 ] [ 1. 0.74295 0.73682 0.74189] [ 2. 0.66492 0.69554 0.75996] [ 3. 0.62392 0.67166 0.7625 ] [ 4. 0.73479 0.80425 0.72861] [ 5. 0.65423 0.68876 0.76318] [ 6. 0.58608 0.64105 0.77783] [ 7. 0.55738 0.62641 0.78721] [ 8. 0.66163 0.74154 0.7501 ] [ 9. 0.59444 0.64253 0.78106] [ 10. 0.53 0.61772 0.79385] [ 11. 0.49747 0.65968 0.77832] [ 12. 0.59463 0.67915 0.77422] [ 13. 0.55023 0.65815 0.78106] [ 14. 0.48959 0.59035 0.80273] [ 15. 0.4459 0.61823 0.79336] [ 16. 0.55848 0.64115 0.78018] [ 17. 0.50268 0.61795 0.79541] [ 18. 0.45084 0.57577 0.80654] [ 19. 0.40726 0.5708 0.80947] [ 20. 0.51177 0.66771 0.78232] [ 21. 0.46516 0.6116 0.79932] [ 22. 0.40966 0.56865 0.81172] [ 23. 0.3852 0.58161 0.80967] [ 24. 0.48268 0.59944 0.79551] [ 25. 0.43282 0.56429 0.81182] [ 26. 0.37634 0.54724 0.81797] [ 27. 0.34953 0.54169 0.82129] [ 28. 0.46053 0.58128 0.80342] [ 29. 0.4041 0.55185 0.82295] [ 30. 0.3599 0.53953 0.82861] [ 31. 0.32937 0.55605 0.82227] CPU times: user 22min 52s, sys: 8min 58s, total: 31min 51s Wall time: 16min 38s
class Resnet2(nn.Module):
def __init__(self, layers, c, p=0.5):
self.conv1 = BnLayer(3, 16, stride=1, kernel_size=7)
self.layers = nn.ModuleList([BnLayer(layers[i], layers[i+1])
for i in range(len(layers) - 1)])
self.layers2 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
for i in range(len(layers) - 1)])
self.layers3 = nn.ModuleList([ResnetLayer(layers[i+1], layers[i + 1], 1)
for i in range(len(layers) - 1)])
self.out = nn.Linear(layers[-1], c)
self.drop = nn.Dropout(p)
def forward(self, x):
x = self.conv1(x)
for l,l2,l3 in zip(self.layers, self.layers2, self.layers3):
x = l3(l2(l(x)))
x = F.adaptive_max_pool2d(x, 1)
x = x.view(x.size(0), -1)
x = self.drop(x)
return F.log_softmax(self.out(x), dim=-1)
learn = ConvLearner.from_model_data(Resnet2([16, 32, 64, 128, 256], 10, 0.2), data)
%time learn.fit(1e-2, 2, wds=wd)
[ 0. 1.7051 1.53364 0.46885] [ 1. 1.47858 1.34297 0.52734] CPU times: user 1min 29s, sys: 35.4 s, total: 2min 4s Wall time: 1min 6s
%time learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)
[ 0. 1.29414 1.26694 0.57041] [ 1. 1.21206 1.06634 0.62373] [ 2. 1.05583 1.0129 0.64258] [ 3. 1.09763 1.11568 0.61318] [ 4. 0.97597 0.93726 0.67266] [ 5. 0.86295 0.82655 0.71426] [ 6. 0.827 0.8655 0.70244] CPU times: user 5min 11s, sys: 1min 58s, total: 7min 9s Wall time: 3min 48s
%time learn.fit(1e-2, 8, cycle_len=4, wds=wd)
[ 0. 0.92043 0.93876 0.67685] [ 1. 0.8359 0.81156 0.72168] [ 2. 0.73084 0.72091 0.74463] [ 3. 0.68688 0.71326 0.74824] [ 4. 0.81046 0.79485 0.72354] [ 5. 0.72155 0.68833 0.76006] [ 6. 0.63801 0.68419 0.76855] [ 7. 0.59678 0.64972 0.77363] [ 8. 0.71126 0.78098 0.73828] [ 9. 0.63549 0.65685 0.7708 ] [ 10. 0.56837 0.63656 0.78057] [ 11. 0.52093 0.59159 0.79629] [ 12. 0.66463 0.69927 0.76357] [ 13. 0.58121 0.64529 0.77871] [ 14. 0.52346 0.5751 0.80293] [ 15. 0.47279 0.55094 0.80498] [ 16. 0.59857 0.64519 0.77559] [ 17. 0.54384 0.68057 0.77676] [ 18. 0.48369 0.5821 0.80273] [ 19. 0.43456 0.54708 0.81182] [ 20. 0.54963 0.65753 0.78203] [ 21. 0.49259 0.55957 0.80791] [ 22. 0.43646 0.55221 0.81309] [ 23. 0.39269 0.55158 0.81426] [ 24. 0.51039 0.61335 0.7998 ] [ 25. 0.4667 0.56516 0.80869] [ 26. 0.39469 0.5823 0.81299] [ 27. 0.36389 0.51266 0.82764] [ 28. 0.48962 0.55353 0.81201] [ 29. 0.4328 0.55394 0.81328] [ 30. 0.37081 0.50348 0.83359] [ 31. 0.34045 0.52052 0.82949] CPU times: user 23min 30s, sys: 9min 1s, total: 32min 32s Wall time: 17min 16s
log_preds,y = learn.TTA()
preds = np.mean(np.exp(log_preds),0)
metrics.log_loss(y,preds), accuracy_np(preds,y)
(0.44507397166057938, 0.84909999999999997)