%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *
path = untar_data(URLs.CAMVID)
path.ls()
[PosixPath('/home/ubuntu/.fastai/data/camvid/images'), PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'), PosixPath('/home/ubuntu/.fastai/data/camvid/labels'), PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]
path_lbl = path/'labels'
path_img = path/'images'
# path = Path('./data/camvid-small')
# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name
# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
# 'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])
# src = (SegmentationItemList.from_folder(path)
# .split_by_folder(valid='val')
# .label_from_func(get_y_fn, classes=codes))
# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
# .databunch(bs=bs)
# .normalize(imagenet_stats))
fnames = get_image_files(path_img)
fnames[:3]
[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'), PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'), PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]
lbl_names = get_image_files(path_lbl)
lbl_names[:3]
[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'), PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'), PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)
src_size = np.array(mask.shape[1:])
src_size,mask.data
(array([720, 960]), tensor([[[ 4, 4, 4, ..., 4, 4, 4], [ 4, 4, 4, ..., 4, 4, 4], [ 4, 4, 4, ..., 4, 4, 4], ..., [17, 17, 17, ..., 30, 30, 30], [17, 17, 17, ..., 30, 30, 30], [17, 17, 17, ..., 30, 30, 30]]]))
codes = np.loadtxt(path/'codes.txt', dtype=str); codes
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole', 'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock', 'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone', 'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')
size = src_size//2
free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else: bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")
src = (SegmentationItemList.from_folder(path_img)
.split_by_fname_file('../valid.txt')
.label_from_func(get_y_fn, classes=codes))
data = (src.transform(get_transforms(), size=size, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
data.show_batch(2, figsize=(10,7))
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']
def acc_camvid(input, target):
target = target.squeeze(1)
mask = target != void_code
return (input.argmax(dim=1)[mask]==target[mask]).float().mean()
metrics=acc_camvid
# metrics=accuracy
wd=1e-2
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)
lr_find(learn)
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
lr=3e-3
learn.fit_one_cycle(10, slice(lr), pct_start=0.9)
epoch | train_loss | valid_loss | acc_camvid |
---|---|---|---|
1 | 1.172738 | 0.771233 | 0.820753 |
2 | 0.799105 | 0.637969 | 0.825231 |
3 | 0.731867 | 0.628517 | 0.824456 |
4 | 0.630086 | 0.486045 | 0.860029 |
5 | 0.616145 | 0.535374 | 0.860695 |
6 | 0.588540 | 0.439988 | 0.879501 |
7 | 0.551265 | 0.412592 | 0.884478 |
8 | 0.568172 | 0.679087 | 0.832221 |
9 | 0.538392 | 0.405911 | 0.879605 |
10 | 0.451235 | 0.353972 | 0.892891 |
learn.save('stage-1')
learn.load('stage-1');
learn.show_results(rows=3, figsize=(8,9))
learn.unfreeze()
lrs = slice(lr/400,lr/4)
learn.fit_one_cycle(12, lrs, pct_start=0.8)
epoch | train_loss | valid_loss | acc_camvid |
---|---|---|---|
1 | 0.389135 | 0.334715 | 0.896700 |
2 | 0.377873 | 0.324080 | 0.900284 |
3 | 0.369020 | 0.325073 | 0.904146 |
4 | 0.355022 | 0.308820 | 0.912556 |
5 | 0.351138 | 0.313001 | 0.909351 |
6 | 0.347777 | 0.285509 | 0.920183 |
7 | 0.338683 | 0.306076 | 0.909899 |
8 | 0.318913 | 0.303712 | 0.915792 |
9 | 0.312038 | 0.276126 | 0.920137 |
10 | 0.311217 | 0.276649 | 0.925244 |
11 | 0.285135 | 0.268458 | 0.922453 |
12 | 0.256778 | 0.262011 | 0.926964 |
learn.save('stage-2');
You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs
.
如果内存不够的话,你可能需要重启你的计算内核,然后再返回这一步,同时可能要减少 bs
的设定。
learn.destroy()
size = src_size
free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else: bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")
data = (src.transform(get_transforms(), size=size, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)
learn.load('stage-2');
lr_find(learn)
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
lr=1e-3
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)
epoch | train_loss | valid_loss | acc_camvid |
---|---|---|---|
1 | 0.366140 | 0.339497 | 0.908682 |
2 | 0.333278 | 0.304011 | 0.916702 |
3 | 0.326025 | 0.305404 | 0.919941 |
4 | 0.336885 | 0.321342 | 0.910933 |
5 | 0.326935 | 0.305589 | 0.919401 |
6 | 0.347779 | 0.333608 | 0.908895 |
7 | 0.328334 | 0.352358 | 0.905482 |
8 | 0.327277 | 0.387525 | 0.912187 |
9 | 0.291777 | 0.293065 | 0.918046 |
10 | 0.228348 | 0.257859 | 0.929750 |
learn.save('stage-1-big')
learn.load('stage-1-big');
learn.unfreeze()
lrs = slice(1e-6,lr/10)
learn.fit_one_cycle(10, lrs)
epoch | train_loss | valid_loss | acc_camvid |
---|---|---|---|
1 | 0.225999 | 0.254650 | 0.930782 |
2 | 0.216341 | 0.256007 | 0.930751 |
3 | 0.209800 | 0.255649 | 0.930709 |
4 | 0.203509 | 0.252857 | 0.931682 |
5 | 0.202308 | 0.258498 | 0.931308 |
6 | 0.200796 | 0.257311 | 0.931915 |
7 | 0.204560 | 0.251486 | 0.933218 |
8 | 0.192893 | 0.254977 | 0.932786 |
9 | 0.189505 | 0.258091 | 0.932555 |
10 | 0.190156 | 0.256064 | 0.933315 |
learn.save('stage-2-big')
learn.load('stage-2-big');
learn.show_results(rows=3, figsize=(10,10))