Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x

In [ ]:

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [ ]:

from fastai.conv_learner import *
from fastai.dataset import *

import json, pdb
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
torch.cuda.set_device(0)

In [ ]:

torch.backends.cudnn.benchmark=True

Setup¶

In [ ]:

PATH = Path('data/pascal')
trn_j = json.load((PATH / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'

cats = dict((o[ID], o['name']) for o in trn_j[CATEGORIES])
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]

JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS

In [ ]:

def get_trn_anno():
    trn_anno = collections.defaultdict(lambda:[])
    for o in trn_j[ANNOTATIONS]:
        if not o['ignore']:
            bb = o[BBOX]
            bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
            trn_anno[o[IMG_ID]].append((bb,o[CAT_ID]))
    return trn_anno

trn_anno = get_trn_anno()

In [ ]:

def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.set_xticks(np.linspace(0, 224, 8))
    ax.set_yticks(np.linspace(0, 224, 8))
    ax.grid()
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

def draw_text(ax, xy, txt, sz=14, color='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=color, fontsize=sz, weight='bold')
    draw_outline(text, 1)

In [ ]:

def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])

def draw_im(im, ann):
    ax = show_img(im, figsize=(16,8))
    for b,c in ann:
        b = bb_hw(b)
        draw_rect(ax, b)
        draw_text(ax, b[:2], cats[c], sz=16)

def draw_idx(i):
    im_a = trn_anno[i]
    im = open_image(IMG_PATH/trn_fns[i])
    draw_im(im, im_a)

Multi class¶

In [ ]:

MC_CSV = PATH/'tmp/mc.csv'

In [ ]:

trn_anno[12]

Out[ ]:

[(array([ 96, 155, 269, 350]), 7)]

In [ ]:

mc = [set([cats[p[1]] for p in trn_anno[o]]) for o in trn_ids]
mcs = [' '.join(str(p) for p in o) for o in mc]

In [ ]:

df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'clas': mcs}, columns=['fn','clas'])
df.to_csv(MC_CSV, index=False)

In [ ]:

f_model=resnet34
sz=224
bs=64

In [ ]:

tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, MC_CSV, tfms=tfms, bs=bs)

In [ ]:

learn = ConvLearner.pretrained(f_model, md)
learn.opt_fn = optim.Adam

In [ ]:

lrf=learn.lr_find(1e-5,100)

Failed to display Jupyter Widget of type HBox.

If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.

If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.

epoch      trn_loss   val_loss   <lambda>                                                                              
    0      1.472438   10.808105  0.608842

In [ ]:

learn.sched.plot(0)

In [ ]:

lr = 2e-2

In [ ]:

learn.fit(lr, 1, cycle_len=3, use_clr=(32,5))

A Jupyter Widget

epoch      trn_loss   val_loss   <lambda>                  
    0      0.104836   0.085015   0.972356  
    1      0.088193   0.079739   0.972461                   
    2      0.072346   0.077259   0.974114

Out[ ]:

[0.077258907, 0.9741135761141777]

In [ ]:

lrs = np.array([lr/100, lr/10, lr])

In [ ]:

learn.freeze_to(-2)

In [ ]:

learn.lr_find(lrs/1000)
learn.sched.plot(0)

Failed to display Jupyter Widget of type HBox.

If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.

If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.

 81%|█████████████████████████████████████████████████████████▋             | 26/32 [00:22<00:05,  1.15it/s, loss=0.33]

In [ ]:

learn.fit(lrs/10, 1, cycle_len=5, use_clr=(32,5))

A Jupyter Widget

epoch      trn_loss   val_loss   <lambda>                   
    0      0.063236   0.088847   0.970681  
    1      0.049675   0.079885   0.973723                   
    2      0.03693    0.076906   0.975601                   
    3      0.026645   0.075304   0.976187                   
    4      0.018805   0.074934   0.975165

Out[ ]:

[0.074934497, 0.97516526281833649]

In [ ]:

learn.save('mclas')

In [ ]:

learn.load('mclas')

In [ ]:

y = learn.predict()
x,_ = next(iter(md.val_dl))
x = to_np(x)

In [ ]:

fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    ya = np.nonzero(y[i]>0.4)[0]
    b = '\n'.join(md.classes[o] for o in ya)
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), b)
plt.tight_layout()

Bbox per cell¶

Set up data¶

In [ ]:

CLAS_CSV = PATH/'tmp/clas.csv'
MBB_CSV = PATH/'tmp/mbb.csv'

f_model=resnet34
sz=224
bs=64

In [ ]:

mc = [[cats[p[1]] for p in trn_anno[o]] for o in trn_ids]
id2cat = list(cats.values())
cat2id = {v:k for k,v in enumerate(id2cat)}
mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc]); mcs

Out[ ]:

array([array([6]), array([14, 12]), array([ 1,  1, 14, 14, 14]), ..., array([17,  8, 14, 14, 14]), array([6]),
       array([11])], dtype=object)

In [ ]:

val_idxs = get_cv_idxs(len(trn_fns))
((val_mcs,trn_mcs),) = split_by_idx(val_idxs, mcs)

In [ ]:

mbb = [np.concatenate([p[0] for p in trn_anno[o]]) for o in trn_ids]
mbbs = [' '.join(str(p) for p in o) for o in mbb]

df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'bbox': mbbs}, columns=['fn','bbox'])
df.to_csv(MBB_CSV, index=False)

In [ ]:

df.head()

Out[ ]:

	fn	bbox
0	000012.jpg	96 155 269 350
1	000017.jpg	61 184 198 278 77 89 335 402
2	000023.jpg	229 8 499 244 219 229 499 333 0 1 368 116 1 2 ...
3	000026.jpg	124 89 211 336
4	000032.jpg	77 103 182 374 87 132 122 196 179 194 228 212 ...

In [ ]:

aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
            RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
            RandomFlip(tfm_y=TfmType.COORD)]
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md = ImageClassifierData.from_csv(PATH, JPEGS, MBB_CSV, tfms=tfms, bs=bs, continuous=True, num_workers=4)

In [ ]:

import matplotlib.cm as cmx
import matplotlib.colors as mcolors
from cycler import cycler

def get_cmap(N):
    color_norm  = mcolors.Normalize(vmin=0, vmax=N-1)
    return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba

num_colr = 12
cmap = get_cmap(num_colr)
colr_list = [cmap(float(x)) for x in range(num_colr)]

In [ ]:

def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
    bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
    if prs is None:  prs  = [None]*len(bb)
    if clas is None: clas = [None]*len(bb)
    ax = show_img(im, ax=ax)
    for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
        if((b[2]>0) and (pr is None or pr > thresh)):
            draw_rect(ax, b, color=colr_list[i%num_colr])
            txt = f'{i}: '
            if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
            if pr is not None: txt += f' {pr:.2f}'
            draw_text(ax, b[:2], txt, color=colr_list[i%num_colr])

In [ ]:

class ConcatLblDataset(Dataset):
    def __init__(self, ds, y2):
        self.ds,self.y2 = ds,y2
        self.sz = ds.sz
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x,y = self.ds[i]
        return (x, (y,self.y2[i]))

In [ ]:

trn_ds2 = ConcatLblDataset(md.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md.val_ds, val_mcs)
md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2

In [ ]:

x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)

In [ ]:

x,y=to_np(next(iter(md.trn_dl)))
x=md.trn_ds.ds.denorm(x)

In [ ]:

fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for i,ax in enumerate(axes.flat):
    show_ground_truth(ax, x[i], y[0][i], y[1][i])
plt.tight_layout()

Set up model¶

We're going to make a simple first model that simply predicts what object is located in each cell of a 4x4 grid. Later on we can try to improve this.

In [ ]:

anc_grid = 4
k = 1

anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)

anc_ctrs = np.tile(np.stack([anc_x,anc_y], axis=1), (k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()

In [ ]:

grid_sizes = V(np.array([1/anc_grid]), requires_grad=False).unsqueeze(1)

In [ ]:

plt.scatter(anc_x, anc_y)
plt.xlim(0, 1)
plt.ylim(0, 1);

In [ ]:

anchors

Out[ ]:

Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:

def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

In [ ]:

anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_cnr

Out[ ]:

Variable containing:
 0.0000  0.0000  0.2500  0.2500
 0.0000  0.2500  0.2500  0.5000
 0.0000  0.5000  0.2500  0.7500
 0.0000  0.7500  0.2500  1.0000
 0.2500  0.0000  0.5000  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.5000  0.5000  0.7500
 0.2500  0.7500  0.5000  1.0000
 0.5000  0.0000  0.7500  0.2500
 0.5000  0.2500  0.7500  0.5000
 0.5000  0.5000  0.7500  0.7500
 0.5000  0.7500  0.7500  1.0000
 0.7500  0.0000  1.0000  0.2500
 0.7500  0.2500  1.0000  0.5000
 0.7500  0.5000  1.0000  0.7500
 0.7500  0.7500  1.0000  1.0000
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:

n_clas = len(id2cat)+1
n_act = k*(4+n_clas)

In [ ]:

class StdConv(nn.Module):
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
        
def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

In [ ]:

class OutConv(nn.Module):
    def __init__(self, k, nin, bias):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
        self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
        self.oconv1.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv1(x), self.k),
                flatten_conv(self.oconv2(x), self.k)]

In [ ]:

class SSD_Head(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(0.25)
        self.sconv0 = StdConv(512,256, stride=1)
#         self.sconv1 = StdConv(256,256)
        self.sconv2 = StdConv(256,256)
        self.out = OutConv(k, 256, bias)
        
    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
#         x = self.sconv1(x)
        x = self.sconv2(x)
        return self.out(x)

head_reg4 = SSD_Head(k, -3.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam
k

Out[ ]:

Train¶

In [ ]:

def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ, self.num_classes+1)
        t = V(t[:,:-1].contiguous())#.cpu()
        x = pred[:,:-1]
        w = self.get_weight(x,t)
        return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
    
    def get_weight(self,x,t): return None

loss_f = BCE_Loss(len(id2cat))

In [ ]:

def intersect(box_a, box_b):
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]

def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))

def jaccard(box_a, box_b):
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union

In [ ]:

def get_y(bbox,clas):
    bbox = bbox.view(-1,4)/sz
    bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
    return bbox[bb_keep],clas[bb_keep]

def actn_to_bb(actn, anchors):
    actn_bbs = torch.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

def map_to_ground_truth(overlaps, print_it=False):
    prior_overlap, prior_idx = overlaps.max(1)
    if print_it: print(prior_overlap)
#     pdb.set_trace()
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

def ssd_1_loss(b_c,b_bb,bbox,clas,print_it=False):
    bbox,clas = get_y(bbox,clas)
    a_ic = actn_to_bb(b_bb, anchors)
    overlaps = jaccard(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_ground_truth(overlaps,print_it)
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[1-pos] = len(id2cat)
    gt_bbox = bbox[gt_idx]
    loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    clas_loss  = loss_f(b_c, gt_clas)
    return loc_loss, clas_loss

def ssd_loss(pred,targ,print_it=False):
    lcs,lls = 0.,0.
    for b_c,b_bb,bbox,clas in zip(*pred,*targ):
        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,print_it)
        lls += loc_loss
        lcs += clas_loss
    if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return lls+lcs

In [ ]:

x,y = next(iter(md.val_dl))
# x,y = V(x).cpu(),V(y)
x,y = V(x),V(y)

In [ ]:

for i,o in enumerate(y): y[i] = o.cuda()
learn.model.cuda()

In [ ]:

batch = learn.model(x)

In [ ]:

 # uncomment to debug on cpu
#anchors = anchors.cpu(); grid_sizes = grid_sizes.cpu(); anchor_cnr = anchor_cnr.cpu()

In [ ]:

ssd_loss(batch, y, True)

 0.1947
 0.1168
 0.2652
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2885
 0.0888
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.9027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1608
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3237
 0.2153
 0.2558
 0.2013
 0.2526
 0.0485
 0.0879
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3258
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2704
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.4538
 0.1897
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.1527
 0.1863
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3426
 0.3249
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.0642
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2418
 0.2337
 0.2590
[torch.cuda.FloatTensor of size 3 (GPU 0)]


1.00000e-02 *
  8.4642
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3652
 0.1377
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.5146
  5.7398
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.1148
 0.1341
 0.1650
 0.0384
 0.2213
 0.1477
 0.2520
 0.2531
 0.2129
 0.2144
 0.1795
 0.3002
 0.3057
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.2097
 0.2182
 0.2786
 0.2973
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2568
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2184
 0.2459
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1166
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0898
 0.0548
 0.4860
 0.0865
 0.1805
 0.2080
 0.2583
 0.0650
 0.0383
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2222
 0.1000
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  6.6300
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1940
 0.1498
 0.4352
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.1231
 0.2356
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2515
 0.2851
 0.2107
 0.2351
 0.2572
 0.1801
 0.2538
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2544
 0.0842
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1890
 0.2767
 0.2161
 0.2104
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.1465
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3846
 0.4679
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1677
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3781
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1589
 0.1125
 0.1994
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2164
 0.4026
 0.3522
 0.2881
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.3824
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1823
 0.0647
 0.0404
 0.1737
 0.1553
 0.3090
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2105
 0.2143
 0.1074
 0.1572
 0.1939
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.1817
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0536
 0.2392
 0.4061
 0.0804
 0.3463
 0.3876
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1975
 0.1799
 0.2146
 0.0935
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2553
 0.1721
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2017
 0.0885
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4367
 0.2400
 0.1817
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2471
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3207
 0.2089
 0.6309
 0.1183
 0.2568
[torch.cuda.FloatTensor of size 5 (GPU 0)]


1.00000e-02 *
  8.3850
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1024
 0.2968
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  8.3770
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2832
 0.1478
 0.0903
 0.3304
 0.1316
 0.1940
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.4223
 0.1600
 0.2250
 0.3211
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3666
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1067
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1610
 0.1593
 0.3415
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3255
 0.3394
 0.3390
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2139
 0.3500
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1369
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1455
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2794
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


1.00000e-02 *
  6.3919
  9.1493
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4062
 0.2180
 0.1307
 0.5762
 0.1524
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1128
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 10.175130844116211, clas: 72.91587829589844

Out[ ]:

Variable containing:
 83.0910
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:

learn.crit = ssd_loss
lr = 3e-3
lrs = np.array([lr/100,lr/10,lr])

In [ ]:

learn.lr_find(lrs/1000,1.)
learn.sched.plot(1)

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      86.852668  32587.789062

In [ ]:

learn.fit(lr, 1, cycle_len=5, use_clr=(20,10))

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      43.166077  32.56049  
    1      33.731625  28.329123                           
    2      29.498006  27.387726                           
    3      26.590789  26.043869                           
    4      24.470896  25.746592

Out[ ]:

[25.746592]

In [ ]:

learn.save('0')

In [ ]:

learn.load('0')

Testing¶

In [ ]:

x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
learn.model.eval()
batch = learn.model(x)
b_clas,b_bb = batch

In [ ]:

b_clas.size(),b_bb.size()

Out[ ]:

(torch.Size([64, 16, 21]), torch.Size([64, 16, 4]))

In [ ]:

idx=7
b_clasi = b_clas[idx]
b_bboxi = b_bb[idx]
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
bbox,clas

Out[ ]:

(Variable containing:
  0.6786  0.4866  0.9911  0.6250
  0.7098  0.0848  0.9911  0.5491
  0.5134  0.8304  0.6696  0.9063
 [torch.cuda.FloatTensor of size 3x4 (GPU 0)], Variable containing:
   8
  10
  17
 [torch.cuda.LongTensor of size 3 (GPU 0)])

In [ ]:

def torch_gt(ax, ima, bbox, clas, prs=None, thresh=0.4):
    return show_ground_truth(ax, ima, to_np((bbox*224).long()),
         to_np(clas), to_np(prs) if prs is not None else None, thresh)

In [ ]:

fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, bbox, clas)

In [ ]:

fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, anchor_cnr, b_clasi.max(1)[1])

In [ ]:

grid_sizes

Out[ ]:

Variable containing:
 0.2500
[torch.cuda.FloatTensor of size 1x1 (GPU 0)]

In [ ]:

anchors

Out[ ]:

Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:

a_ic = actn_to_bb(b_bboxi, anchors)

In [ ]:

fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, a_ic, b_clasi.max(1)[1], b_clasi.max(1)[0].sigmoid(), thresh=0.0)

In [ ]:

overlaps = jaccard(bbox.data, anchor_cnr.data)
overlaps

Out[ ]:


Columns 0 to 9 
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0091
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0356  0.0549
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 15 
 0.0922  0.0000  0.0000  0.0315  0.3985  0.0000
 0.0103  0.0000  0.2598  0.4538  0.0653  0.0000
 0.0000  0.1897  0.0000  0.0000  0.0000  0.0000
[torch.cuda.FloatTensor of size 3x16 (GPU 0)]

In [ ]:

overlaps.max(1)

Out[ ]:

(
  0.3985
  0.4538
  0.1897
 [torch.cuda.FloatTensor of size 3 (GPU 0)], 
  14
  13
  11
 [torch.cuda.LongTensor of size 3 (GPU 0)])

In [ ]:

overlaps.max(0)

Out[ ]:

(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  0.1897
  0.2598
  0.4538
  0.3985
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])

In [ ]:

gt_overlap,gt_idx = map_to_ground_truth(overlaps)
gt_overlap,gt_idx

Out[ ]:

(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  1.9900
  0.2598
  1.9900
  1.9900
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])

In [ ]:

gt_clas = clas[gt_idx]; gt_clas

Out[ ]:

Variable containing:
  8
  8
  8
  8
  8
  8
  8
  8
 10
 10
  8
 17
 10
 10
  8
  8
[torch.cuda.LongTensor of size 16 (GPU 0)]

In [ ]:

thresh = 0.5
pos = gt_overlap > thresh
pos_idx = torch.nonzero(pos)[:,0]
neg_idx = torch.nonzero(1-pos)[:,0]
pos_idx

Out[ ]:

 11
 13
 14
[torch.cuda.LongTensor of size 3 (GPU 0)]

In [ ]:

gt_clas[1-pos] = len(id2cat)
[id2cat[o] if o<len(id2cat) else 'bg' for o in gt_clas.data]

Out[ ]:

['bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'sofa',
 'bg',
 'diningtable',
 'chair',
 'bg']

In [ ]:

gt_bbox = bbox[gt_idx]
loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
clas_loss  = F.cross_entropy(b_clasi, gt_clas)
loc_loss,clas_loss

Out[ ]:

(Variable containing:
 1.00000e-02 *
   6.5691
 [torch.cuda.FloatTensor of size 1 (GPU 0)], Variable containing:
  1.1215
 [torch.cuda.FloatTensor of size 1 (GPU 0)])

In [ ]:

fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(bbox,clas); bbox,clas
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.01)
plt.tight_layout()

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

More anchors!¶

Create anchors¶

In [ ]:

anc_grids = [4,2,1]
# anc_grids = [2]
anc_zooms = [0.7, 1., 1.3]
# anc_zooms = [1.]
anc_ratios = [(1.,1.), (1.,0.5), (0.5,1.)]
# anc_ratios = [(1.,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
k = len(anchor_scales)
anc_offsets = [1/(o*2) for o in anc_grids]
k

Out[ ]:

In [ ]:

anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)

In [ ]:

anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])

In [ ]:

anchors

Out[ ]:

Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.1250  0.2500  0.1250
 0.1250  0.1250  0.1250  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.3750  0.2500  0.1250
 0.1250  0.3750  0.1250  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.6250  0.2500  0.1250
 0.1250  0.6250  0.1250  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.1250  0.8750  0.2500  0.1250
 0.1250  0.8750  0.1250  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.1250  0.2500  0.1250
 0.3750  0.1250  0.1250  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.3750  0.2500  0.1250
 0.3750  0.3750  0.1250  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.6250  0.2500  0.1250
 0.3750  0.6250  0.1250  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.3750  0.8750  0.2500  0.1250
 0.3750  0.8750  0.1250  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.1250  0.2500  0.1250
 0.6250  0.1250  0.1250  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.3750  0.2500  0.1250
 0.6250  0.3750  0.1250  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.6250  0.2500  0.1250
 0.6250  0.6250  0.1250  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.6250  0.8750  0.2500  0.1250
 0.6250  0.8750  0.1250  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.1250  0.2500  0.1250
 0.8750  0.1250  0.1250  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.3750  0.2500  0.1250
 0.8750  0.3750  0.1250  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.6250  0.2500  0.1250
 0.8750  0.6250  0.1250  0.2500
 0.8750  0.8750  0.2500  0.2500
 0.8750  0.8750  0.2500  0.1250
 0.8750  0.8750  0.1250  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.2500  0.5000  0.2500
 0.2500  0.2500  0.2500  0.5000
 0.2500  0.7500  0.5000  0.5000
 0.2500  0.7500  0.5000  0.2500
 0.2500  0.7500  0.2500  0.5000
 0.7500  0.2500  0.5000  0.5000
 0.7500  0.2500  0.5000  0.2500
 0.7500  0.2500  0.2500  0.5000
 0.7500  0.7500  0.5000  0.5000
 0.7500  0.7500  0.5000  0.2500
 0.7500  0.7500  0.2500  0.5000
 0.5000  0.5000  1.0000  1.0000
 0.5000  0.5000  1.0000  0.5000
 0.5000  0.5000  0.5000  1.0000
[torch.cuda.FloatTensor of size 63x4 (GPU 0)]

In [ ]:

x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)

In [ ]:

a=np.reshape((to_np(anchor_cnr) + to_np(torch.randn(*anchor_cnr.size()))*0.01)*224, -1)

In [ ]:

fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

In [ ]:

fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Model¶

In [ ]:

drop=0.4

class SSD_MultiHead(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(drop)
        self.sconv0 = StdConv(512,256, stride=1, drop=drop)
        self.sconv1 = StdConv(256,256, drop=drop)
        self.sconv2 = StdConv(256,256, drop=drop)
        self.sconv3 = StdConv(256,256, drop=drop)
        self.out0 = OutConv(k, 256, bias)
        self.out1 = OutConv(k, 256, bias)
        self.out2 = OutConv(k, 256, bias)
        self.out3 = OutConv(k, 256, bias)

    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
        x = self.sconv1(x)
        o1c,o1l = self.out1(x)
        x = self.sconv2(x)
        o2c,o2l = self.out2(x)
        x = self.sconv3(x)
        o3c,o3l = self.out3(x)
        return [torch.cat([o1c,o2c,o3c], dim=1),
                torch.cat([o1l,o2l,o3l], dim=1)]

head_reg4 = SSD_MultiHead(k, -4.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam

In [ ]:

learn.crit = ssd_loss
lr = 1e-2
lrs = np.array([lr/100,lr/10,lr])

In [ ]:

x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(V(x))

In [ ]:

batch[0].size(),batch[1].size()

Out[ ]:

(torch.Size([64, 189, 21]), torch.Size([64, 189, 4]))

In [ ]:

ssd_loss(batch, y, True)

 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.5301
[torch.cuda.FloatTensor of size 1 (GPU 0)]
 Variable containing:
 61.3364
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Out[ ]:

Variable containing:
 65.8664
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:

learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=2)

A Jupyter Widget

 94%|█████████▍| 30/32 [00:18<00:01,  1.65it/s, loss=89.7]

In [ ]:

learn.fit(lrs, 1, cycle_len=4, use_clr=(20,8))

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      23.020269  22.007149 
    1      19.23732   15.323267                           
    2      16.612079  13.967303                           
    3      14.706582  12.920008

Out[ ]:

[12.920008]

In [ ]:

learn.save('tmp')

In [ ]:

learn.freeze_to(-2)
learn.fit(lrs/2, 1, cycle_len=4, use_clr=(20,8))

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      14.021227  17.886932 
    1      13.386686  12.754044                           
    2      12.297876  11.913645                           
    3      11.29237   11.368293

Out[ ]:

[11.368293]

In [ ]:

learn.save('prefocal')

In [ ]:

x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)

fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.21)
plt.tight_layout()

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Focal loss¶

In [ ]:

def plot_results(thresh):
    x,y = next(iter(md.val_dl))
    y = V(y)
    batch = learn.model(V(x))
    b_clas,b_bb = batch

    x = to_np(x)
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    for idx,ax in enumerate(axes.flat):
        ima=md.val_ds.ds.denorm(x)[idx]
        bbox,clas = get_y(y[0][idx], y[1][idx])
        a_ic = actn_to_bb(b_bb[idx], anchors)
        clas_pr, clas_ids = b_clas[idx].max(1)
        clas_pr = clas_pr.sigmoid()
        torch_gt(ax, ima, a_ic, clas_ids, clas_pr, clas_pr.max().data[0]*thresh)
    plt.tight_layout()

In [ ]:

class FocalLoss(BCE_Loss):
    def get_weight(self,x,t):
        alpha,gamma = 0.25,1
        p = x.sigmoid()
        pt = p*t + (1-p)*(1-t)
        w = alpha*t + (1-alpha)*(1-t)
        return w * (1-pt).pow(gamma)

loss_f = FocalLoss(len(id2cat))

In [ ]:

x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(x)
ssd_loss(batch, y, True)

 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 3.6088805198669434, clas: 7.331346035003662

Out[ ]:

Variable containing:
 10.9402
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:

learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=1)

A Jupyter Widget

 19%|█▉        | 6/32 [00:05<00:24,  1.07it/s, loss=67.7]

Exception in thread Thread-228:
Traceback (most recent call last):
  File "/home/jhoward/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/jhoward/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/jhoward/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 72%|███████▏  | 23/32 [00:15<00:06,  1.44it/s, loss=nan]

In [ ]:

learn.fit(lrs, 1, cycle_len=10, use_clr=(20,10))

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      27.244542  32.936592 
    1      24.687115  22.024887                           
    2      22.251388  19.824711                           
    3      20.238359  18.530561                           
    4      18.613792  17.730865                           
    5      17.376519  17.323956                           
    6      16.33999   16.968851                           
    7      15.425277  16.894522                           
    8      14.683091  16.533207                           
    9      14.044275  16.332354

Out[ ]:

[16.332354]

In [ ]:

learn.save('fl0')

In [ ]:

learn.load('fl0')

In [ ]:

learn.freeze_to(-2)
learn.fit(lrs/4, 1, cycle_len=10, use_clr=(20,10))

A Jupyter Widget

epoch      trn_loss   val_loss                            
    0      13.991107  17.163681 
    1      14.275143  16.685173                           
    2      13.81701   16.067303                           
    3      13.172081  15.567028                           
    4      12.474847  15.480181                           
    5      11.774984  15.262911                           
    6      11.21864   15.10038                            
    7      10.711037  15.184849                           
    8      10.215424  14.942656                           
    9      9.871555   14.910997

Out[ ]:

[14.910997]

In [ ]:

learn.save('drop4')

In [ ]:

learn.load('drop4')

In [ ]:

plot_results(0.75)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

NMS¶

In [ ]:

def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

In [ ]:

x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)

In [ ]:

def show_nmf(idx):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    clas_pr, clas_ids = b_clas[idx].max(1)
    clas_pr = clas_pr.sigmoid()

    conf_scores = b_clas[idx].sigmoid().t().data

    out1,out2,cc = [],[],[]
    for cl in range(0, len(conf_scores)-1):
        c_mask = conf_scores[cl] > 0.25
        if c_mask.sum() == 0: continue
        scores = conf_scores[cl][c_mask]
        l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    if not cc:
        print(f"{i}: empty array")
        return
    cc = T(np.concatenate(cc))
    out1 = torch.cat(out1)
    out2 = torch.cat(out2)

    fig, ax = plt.subplots(figsize=(8,8))
    torch_gt(ax, ima, out2, cc, out1, 0.1)

In [ ]:

for i in range(12): show_nmf(i)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

End¶

In [ ]: