Practical Deep Learning for Coders, v3¶

Lesson3_camvid¶

Image segmentation with CamVid¶

用CamVid数据集进行图像分割¶

In [ ]:

%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:

from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [ ]:

path = untar_data(URLs.CAMVID)
path.ls()

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]

In [ ]:

path_lbl = path/'labels'
path_img = path/'images'

Subset classes 子集类¶

In [ ]:

# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data 数据¶

In [ ]:

fnames = get_image_files(path_img)
fnames[:3]

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]

In [ ]:

lbl_names = get_image_files(path_lbl)
lbl_names[:3]

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]

In [ ]:

img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

In [ ]:

get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

In [ ]:

mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [ ]:

src_size = np.array(mask.shape[1:])
src_size,mask.data

Out[ ]:

(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))

In [ ]:

codes = np.loadtxt(path/'codes.txt', dtype=str); codes

Out[ ]:

array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets 数据集¶

In [ ]:

size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:

src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))

In [ ]:

data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:

data.show_batch(2, figsize=(10,7))

In [ ]:

data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)

Model 模型¶

In [ ]:

name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:

metrics=acc_camvid
# metrics=accuracy

In [ ]:

wd=1e-2

In [ ]:

learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

lr=3e-3

In [ ]:

learn.fit_one_cycle(10, slice(lr), pct_start=0.9)

Total time: 05:30

epoch	train_loss	valid_loss	acc_camvid
1	1.172738	0.771233	0.820753
2	0.799105	0.637969	0.825231
3	0.731867	0.628517	0.824456
4	0.630086	0.486045	0.860029
5	0.616145	0.535374	0.860695
6	0.588540	0.439988	0.879501
7	0.551265	0.412592	0.884478
8	0.568172	0.679087	0.832221
9	0.538392	0.405911	0.879605
10	0.451235	0.353972	0.892891

In [ ]:

learn.save('stage-1')

In [ ]:

learn.load('stage-1');

In [ ]:

learn.show_results(rows=3, figsize=(8,9))

In [ ]:

learn.unfreeze()

In [ ]:

lrs = slice(lr/400,lr/4)

In [ ]:

learn.fit_one_cycle(12, lrs, pct_start=0.8)

Total time: 06:39

epoch	train_loss	valid_loss	acc_camvid
1	0.389135	0.334715	0.896700
2	0.377873	0.324080	0.900284
3	0.369020	0.325073	0.904146
4	0.355022	0.308820	0.912556
5	0.351138	0.313001	0.909351
6	0.347777	0.285509	0.920183
7	0.338683	0.306076	0.909899
8	0.318913	0.303712	0.915792
9	0.312038	0.276126	0.920137
10	0.311217	0.276649	0.925244
11	0.285135	0.268458	0.922453
12	0.256778	0.262011	0.926964

In [ ]:

learn.save('stage-2');

Go big 用更大的数据集进行训练¶

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.
如果内存不够的话，你可能需要重启你的计算内核，然后再返回这一步，同时可能要减少 bs 的设定。

In [ ]:

learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:

data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:

learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:

learn.load('stage-2');

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

lr=1e-3

In [ ]:

learn.fit_one_cycle(10, slice(lr), pct_start=0.8)

Total time: 20:31

epoch	train_loss	valid_loss	acc_camvid
1	0.366140	0.339497	0.908682
2	0.333278	0.304011	0.916702
3	0.326025	0.305404	0.919941
4	0.336885	0.321342	0.910933
5	0.326935	0.305589	0.919401
6	0.347779	0.333608	0.908895
7	0.328334	0.352358	0.905482
8	0.327277	0.387525	0.912187
9	0.291777	0.293065	0.918046
10	0.228348	0.257859	0.929750

In [ ]:

learn.save('stage-1-big')

In [ ]:

learn.load('stage-1-big');

In [ ]:

learn.unfreeze()

In [ ]:

lrs = slice(1e-6,lr/10)

In [ ]:

learn.fit_one_cycle(10, lrs)

Total time: 21:05

epoch	train_loss	valid_loss	acc_camvid
1	0.225999	0.254650	0.930782
2	0.216341	0.256007	0.930751
3	0.209800	0.255649	0.930709
4	0.203509	0.252857	0.931682
5	0.202308	0.258498	0.931308
6	0.200796	0.257311	0.931915
7	0.204560	0.251486	0.933218
8	0.192893	0.254977	0.932786
9	0.189505	0.258091	0.932555
10	0.190156	0.256064	0.933315

In [ ]:

learn.save('stage-2-big')

In [ ]:

learn.load('stage-2-big');

In [ ]:

learn.show_results(rows=3, figsize=(10,10))