Image segmentation with CamVid

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
from fastai import *
from fastai.vision import *
from fastai.callbacks.hooks import *
torch.cuda.set_device(0)
In [3]:
path = untar_data(URLs.CAMVID)
path.ls()
Downloading https://s3.amazonaws.com/fast-ai-imagelocal/camvid
Out[3]:
[PosixPath('/home/cedric/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/cedric/.fastai/data/camvid/valid.txt'),
 PosixPath('/home/cedric/.fastai/data/camvid/labels'),
 PosixPath('/home/cedric/.fastai/data/camvid/images')]
In [4]:
path_lbl = path/'labels'
path_img = path/'images'

Subset classes

In [ ]:
# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data

In [5]:
fnames = get_image_files(path_img)
fnames[:3]
Out[5]:
[PosixPath('/home/cedric/.fastai/data/camvid/images/Seq05VD_f03600.png'),
 PosixPath('/home/cedric/.fastai/data/camvid/images/0016E5_08099.png'),
 PosixPath('/home/cedric/.fastai/data/camvid/images/0001TP_007800.png')]
In [6]:
lbl_names = get_image_files(path_lbl)
lbl_names[:3]
Out[6]:
[PosixPath('/home/cedric/.fastai/data/camvid/labels/Seq05VD_f02670_P.png'),
 PosixPath('/home/cedric/.fastai/data/camvid/labels/Seq05VD_f03030_P.png'),
 PosixPath('/home/cedric/.fastai/data/camvid/labels/Seq05VD_f03090_P.png')]
In [7]:
img_f = fnames[3] # fourth image
img = open_image(img_f)
img.show(figsize=(10,10))
In [8]:
get_y_fn = lambda x: path_lbl / f'{x.stem}_P{x.suffix}'
In [9]:
mask = open_mask(get_y_fn(img_f)) # type of vision.image.ImageSegment
mask.show(figsize=(10,10), alpha=1)
In [10]:
# Sanity check
mask.shape, mask.shape[1:]
Out[10]:
(torch.Size([1, 720, 960]), torch.Size([720, 960]))
In [11]:
src_size = np.array(mask.shape[1:])
src_size, mask.data # Return this image pixels as a `LongTensor`.
Out[11]:
(array([720, 960]), tensor([[[21, 21, 21,  ..., 21, 21, 21],
          [21, 21, 21,  ..., 21, 21, 21],
          [21, 21, 21,  ..., 21, 21, 21],
          ...,
          [18, 18, 18,  ..., 17, 17, 17],
          [18, 18, 18,  ..., 17, 17, 17],
          [18, 18, 18,  ..., 17, 17, 17]]]))
In [12]:
codes = np.loadtxt(path/'codes.txt', dtype=str) # Load data from a text file.
codes
Out[12]:
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets

In [13]:
# These two slashes in Python means integer divide because obviously we can't have half pixel amounts in our sizes.
size = src_size // 2
bs = 4 # note that the original value is 8. I'm using Google Cloud Tesla P4 GPU with 8 GB of memory
In [19]:
# SegmentationItemList is ItemList suitable for segmentation tasks. It returns ImageItemList type.
src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt') # Split the data by using the names in valid.txt for the validation set
       .label_from_func(get_y_fn, classes=codes)) # Apply `func` to every input to get its label (mask). Call get_label_cls func which decide the type of category list
In [15]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))
In [16]:
data.show_batch(2, figsize=(10,7))
In [17]:
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)