Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x
%matplotlib inline
%reload_ext autoreload
%autoreload 2
from fastai.conv_learner import *
from fastai.dataset import *
import json, pdb
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark=True
PATH = Path('data/pascal')
trn_j = json.load((PATH / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'
cats = dict((o[ID], o['name']) for o in trn_j[CATEGORIES])
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]
JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS
def get_trn_anno():
trn_anno = collections.defaultdict(lambda:[])
for o in trn_j[ANNOTATIONS]:
if not o['ignore']:
bb = o[BBOX]
bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
trn_anno[o[IMG_ID]].append((bb,o[CAT_ID]))
return trn_anno
trn_anno = get_trn_anno()
def show_img(im, figsize=None, ax=None):
if not ax: fig,ax = plt.subplots(figsize=figsize)
ax.imshow(im)
ax.set_xticks(np.linspace(0, 224, 8))
ax.set_yticks(np.linspace(0, 224, 8))
ax.grid()
ax.set_yticklabels([])
ax.set_xticklabels([])
return ax
def draw_outline(o, lw):
o.set_path_effects([patheffects.Stroke(
linewidth=lw, foreground='black'), patheffects.Normal()])
def draw_rect(ax, b, color='white'):
patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
draw_outline(patch, 4)
def draw_text(ax, xy, txt, sz=14, color='white'):
text = ax.text(*xy, txt,
verticalalignment='top', color=color, fontsize=sz, weight='bold')
draw_outline(text, 1)
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])
def draw_im(im, ann):
ax = show_img(im, figsize=(16,8))
for b,c in ann:
b = bb_hw(b)
draw_rect(ax, b)
draw_text(ax, b[:2], cats[c], sz=16)
def draw_idx(i):
im_a = trn_anno[i]
im = open_image(IMG_PATH/trn_fns[i])
draw_im(im, im_a)
MC_CSV = PATH/'tmp/mc.csv'
trn_anno[12]
[(array([ 96, 155, 269, 350]), 7)]
mc = [set([cats[p[1]] for p in trn_anno[o]]) for o in trn_ids]
mcs = [' '.join(str(p) for p in o) for o in mc]
df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'clas': mcs}, columns=['fn','clas'])
df.to_csv(MC_CSV, index=False)
f_model=resnet34
sz=224
bs=64
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, MC_CSV, tfms=tfms, bs=bs)
learn = ConvLearner.pretrained(f_model, md)
learn.opt_fn = optim.Adam
lrf=learn.lr_find(1e-5,100)
Failed to display Jupyter Widget of type HBox
.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
epoch trn_loss val_loss <lambda> 0 1.472438 10.808105 0.608842
learn.sched.plot(0)
lr = 2e-2
learn.fit(lr, 1, cycle_len=3, use_clr=(32,5))
A Jupyter Widget
epoch trn_loss val_loss <lambda> 0 0.104836 0.085015 0.972356 1 0.088193 0.079739 0.972461 2 0.072346 0.077259 0.974114
[0.077258907, 0.9741135761141777]
lrs = np.array([lr/100, lr/10, lr])
learn.freeze_to(-2)
learn.lr_find(lrs/1000)
learn.sched.plot(0)
Failed to display Jupyter Widget of type HBox
.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
81%|█████████████████████████████████████████████████████████▋ | 26/32 [00:22<00:05, 1.15it/s, loss=0.33]
learn.fit(lrs/10, 1, cycle_len=5, use_clr=(32,5))
A Jupyter Widget
epoch trn_loss val_loss <lambda> 0 0.063236 0.088847 0.970681 1 0.049675 0.079885 0.973723 2 0.03693 0.076906 0.975601 3 0.026645 0.075304 0.976187 4 0.018805 0.074934 0.975165
[0.074934497, 0.97516526281833649]
learn.save('mclas')
learn.load('mclas')
y = learn.predict()
x,_ = next(iter(md.val_dl))
x = to_np(x)
fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
ima=md.val_ds.denorm(x)[i]
ya = np.nonzero(y[i]>0.4)[0]
b = '\n'.join(md.classes[o] for o in ya)
ax = show_img(ima, ax=ax)
draw_text(ax, (0,0), b)
plt.tight_layout()
CLAS_CSV = PATH/'tmp/clas.csv'
MBB_CSV = PATH/'tmp/mbb.csv'
f_model=resnet34
sz=224
bs=64
mc = [[cats[p[1]] for p in trn_anno[o]] for o in trn_ids]
id2cat = list(cats.values())
cat2id = {v:k for k,v in enumerate(id2cat)}
mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc]); mcs
array([array([6]), array([14, 12]), array([ 1, 1, 14, 14, 14]), ..., array([17, 8, 14, 14, 14]), array([6]), array([11])], dtype=object)
val_idxs = get_cv_idxs(len(trn_fns))
((val_mcs,trn_mcs),) = split_by_idx(val_idxs, mcs)
mbb = [np.concatenate([p[0] for p in trn_anno[o]]) for o in trn_ids]
mbbs = [' '.join(str(p) for p in o) for o in mbb]
df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'bbox': mbbs}, columns=['fn','bbox'])
df.to_csv(MBB_CSV, index=False)
df.head()
fn | bbox | |
---|---|---|
0 | 000012.jpg | 96 155 269 350 |
1 | 000017.jpg | 61 184 198 278 77 89 335 402 |
2 | 000023.jpg | 229 8 499 244 219 229 499 333 0 1 368 116 1 2 ... |
3 | 000026.jpg | 124 89 211 336 |
4 | 000032.jpg | 77 103 182 374 87 132 122 196 179 194 228 212 ... |
aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
RandomFlip(tfm_y=TfmType.COORD)]
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md = ImageClassifierData.from_csv(PATH, JPEGS, MBB_CSV, tfms=tfms, bs=bs, continuous=True, num_workers=4)
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
from cycler import cycler
def get_cmap(N):
color_norm = mcolors.Normalize(vmin=0, vmax=N-1)
return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba
num_colr = 12
cmap = get_cmap(num_colr)
colr_list = [cmap(float(x)) for x in range(num_colr)]
def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
if prs is None: prs = [None]*len(bb)
if clas is None: clas = [None]*len(bb)
ax = show_img(im, ax=ax)
for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
if((b[2]>0) and (pr is None or pr > thresh)):
draw_rect(ax, b, color=colr_list[i%num_colr])
txt = f'{i}: '
if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
if pr is not None: txt += f' {pr:.2f}'
draw_text(ax, b[:2], txt, color=colr_list[i%num_colr])
class ConcatLblDataset(Dataset):
def __init__(self, ds, y2):
self.ds,self.y2 = ds,y2
self.sz = ds.sz
def __len__(self): return len(self.ds)
def __getitem__(self, i):
x,y = self.ds[i]
return (x, (y,self.y2[i]))
trn_ds2 = ConcatLblDataset(md.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md.val_ds, val_mcs)
md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)
x,y=to_np(next(iter(md.trn_dl)))
x=md.trn_ds.ds.denorm(x)
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for i,ax in enumerate(axes.flat):
show_ground_truth(ax, x[i], y[0][i], y[1][i])
plt.tight_layout()
We're going to make a simple first model that simply predicts what object is located in each cell of a 4x4 grid. Later on we can try to improve this.
anc_grid = 4
k = 1
anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_ctrs = np.tile(np.stack([anc_x,anc_y], axis=1), (k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
grid_sizes = V(np.array([1/anc_grid]), requires_grad=False).unsqueeze(1)
plt.scatter(anc_x, anc_y)
plt.xlim(0, 1)
plt.ylim(0, 1);
anchors
Variable containing: 0.1250 0.1250 0.2500 0.2500 0.1250 0.3750 0.2500 0.2500 0.1250 0.6250 0.2500 0.2500 0.1250 0.8750 0.2500 0.2500 0.3750 0.1250 0.2500 0.2500 0.3750 0.3750 0.2500 0.2500 0.3750 0.6250 0.2500 0.2500 0.3750 0.8750 0.2500 0.2500 0.6250 0.1250 0.2500 0.2500 0.6250 0.3750 0.2500 0.2500 0.6250 0.6250 0.2500 0.2500 0.6250 0.8750 0.2500 0.2500 0.8750 0.1250 0.2500 0.2500 0.8750 0.3750 0.2500 0.2500 0.8750 0.6250 0.2500 0.2500 0.8750 0.8750 0.2500 0.2500 [torch.cuda.FloatTensor of size 16x4 (GPU 0)]
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_cnr
Variable containing: 0.0000 0.0000 0.2500 0.2500 0.0000 0.2500 0.2500 0.5000 0.0000 0.5000 0.2500 0.7500 0.0000 0.7500 0.2500 1.0000 0.2500 0.0000 0.5000 0.2500 0.2500 0.2500 0.5000 0.5000 0.2500 0.5000 0.5000 0.7500 0.2500 0.7500 0.5000 1.0000 0.5000 0.0000 0.7500 0.2500 0.5000 0.2500 0.7500 0.5000 0.5000 0.5000 0.7500 0.7500 0.5000 0.7500 0.7500 1.0000 0.7500 0.0000 1.0000 0.2500 0.7500 0.2500 1.0000 0.5000 0.7500 0.5000 1.0000 0.7500 0.7500 0.7500 1.0000 1.0000 [torch.cuda.FloatTensor of size 16x4 (GPU 0)]
n_clas = len(id2cat)+1
n_act = k*(4+n_clas)
class StdConv(nn.Module):
def __init__(self, nin, nout, stride=2, drop=0.1):
super().__init__()
self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
self.bn = nn.BatchNorm2d(nout)
self.drop = nn.Dropout(drop)
def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
def flatten_conv(x,k):
bs,nf,gx,gy = x.size()
x = x.permute(0,2,3,1).contiguous()
return x.view(bs,-1,nf//k)
class OutConv(nn.Module):
def __init__(self, k, nin, bias):
super().__init__()
self.k = k
self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
self.oconv1.bias.data.zero_().add_(bias)
def forward(self, x):
return [flatten_conv(self.oconv1(x), self.k),
flatten_conv(self.oconv2(x), self.k)]
class SSD_Head(nn.Module):
def __init__(self, k, bias):
super().__init__()
self.drop = nn.Dropout(0.25)
self.sconv0 = StdConv(512,256, stride=1)
# self.sconv1 = StdConv(256,256)
self.sconv2 = StdConv(256,256)
self.out = OutConv(k, 256, bias)
def forward(self, x):
x = self.drop(F.relu(x))
x = self.sconv0(x)
# x = self.sconv1(x)
x = self.sconv2(x)
return self.out(x)
head_reg4 = SSD_Head(k, -3.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam
k
1
def one_hot_embedding(labels, num_classes):
return torch.eye(num_classes)[labels.data.cpu()]
class BCE_Loss(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.num_classes = num_classes
def forward(self, pred, targ):
t = one_hot_embedding(targ, self.num_classes+1)
t = V(t[:,:-1].contiguous())#.cpu()
x = pred[:,:-1]
w = self.get_weight(x,t)
return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
def get_weight(self,x,t): return None
loss_f = BCE_Loss(len(id2cat))
def intersect(box_a, box_b):
max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))
def jaccard(box_a, box_b):
inter = intersect(box_a, box_b)
union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
return inter / union
def get_y(bbox,clas):
bbox = bbox.view(-1,4)/sz
bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
return bbox[bb_keep],clas[bb_keep]
def actn_to_bb(actn, anchors):
actn_bbs = torch.tanh(actn)
actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
return hw2corners(actn_centers, actn_hw)
def map_to_ground_truth(overlaps, print_it=False):
prior_overlap, prior_idx = overlaps.max(1)
if print_it: print(prior_overlap)
# pdb.set_trace()
gt_overlap, gt_idx = overlaps.max(0)
gt_overlap[prior_idx] = 1.99
for i,o in enumerate(prior_idx): gt_idx[o] = i
return gt_overlap,gt_idx
def ssd_1_loss(b_c,b_bb,bbox,clas,print_it=False):
bbox,clas = get_y(bbox,clas)
a_ic = actn_to_bb(b_bb, anchors)
overlaps = jaccard(bbox.data, anchor_cnr.data)
gt_overlap,gt_idx = map_to_ground_truth(overlaps,print_it)
gt_clas = clas[gt_idx]
pos = gt_overlap > 0.4
pos_idx = torch.nonzero(pos)[:,0]
gt_clas[1-pos] = len(id2cat)
gt_bbox = bbox[gt_idx]
loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
clas_loss = loss_f(b_c, gt_clas)
return loc_loss, clas_loss
def ssd_loss(pred,targ,print_it=False):
lcs,lls = 0.,0.
for b_c,b_bb,bbox,clas in zip(*pred,*targ):
loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,print_it)
lls += loc_loss
lcs += clas_loss
if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
return lls+lcs
x,y = next(iter(md.val_dl))
# x,y = V(x).cpu(),V(y)
x,y = V(x),V(y)
for i,o in enumerate(y): y[i] = o.cuda()
learn.model.cuda()
batch = learn.model(x)
# uncomment to debug on cpu
#anchors = anchors.cpu(); grid_sizes = grid_sizes.cpu(); anchor_cnr = anchor_cnr.cpu()
ssd_loss(batch, y, True)
0.1947 0.1168 0.2652 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2885 0.0888 [torch.cuda.FloatTensor of size 2 (GPU 0)] 1.00000e-02 * 9.9027 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1608 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3237 0.2153 0.2558 0.2013 0.2526 0.0485 0.0879 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.3258 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2704 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3985 0.4538 0.1897 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.1527 0.1863 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.3426 0.3249 0.5062 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.0642 0.2506 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2027 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2418 0.2337 0.2590 [torch.cuda.FloatTensor of size 3 (GPU 0)] 1.00000e-02 * 8.4642 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3652 0.1377 [torch.cuda.FloatTensor of size 2 (GPU 0)] 1.00000e-02 * 9.5146 5.7398 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2041 0.1148 0.1341 0.1650 0.0384 0.2213 0.1477 0.2520 0.2531 0.2129 0.2144 0.1795 0.3002 0.3057 [torch.cuda.FloatTensor of size 14 (GPU 0)] 0.2097 0.2182 0.2786 0.2973 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.2568 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2184 0.2459 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.1166 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.0898 0.0548 0.4860 0.0865 0.1805 0.2080 0.2583 0.0650 0.0383 [torch.cuda.FloatTensor of size 9 (GPU 0)] 0.2222 0.1000 [torch.cuda.FloatTensor of size 2 (GPU 0)] 1.00000e-02 * 6.6300 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1940 0.1498 0.4352 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5732 0.1231 0.2356 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2515 0.2851 0.2107 0.2351 0.2572 0.1801 0.2538 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.2544 0.0842 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.1890 0.2767 0.2161 0.2104 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.1465 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3846 0.4679 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.1677 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3781 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1589 0.1125 0.1994 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2309 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2164 0.4026 0.3522 0.2881 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.4166 0.3824 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.1823 0.0647 0.0404 0.1737 0.1553 0.3090 0.3726 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.2105 0.2143 0.1074 0.1572 0.1939 [torch.cuda.FloatTensor of size 5 (GPU 0)] 0.1817 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.0536 0.2392 0.4061 0.0804 0.3463 0.3876 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.1975 0.1799 0.2146 0.0935 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.2553 0.1721 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2017 0.0885 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4367 0.2400 0.1817 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2471 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3207 0.2089 0.6309 0.1183 0.2568 [torch.cuda.FloatTensor of size 5 (GPU 0)] 1.00000e-02 * 8.3850 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1024 0.2968 [torch.cuda.FloatTensor of size 2 (GPU 0)] 1.00000e-02 * 8.3770 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2832 0.1478 0.0903 0.3304 0.1316 0.1940 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.4223 0.1600 0.2250 0.3211 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.3666 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1067 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1610 0.1593 0.3415 0.6606 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.3255 0.3394 0.3390 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2139 0.3500 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.1369 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1455 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2794 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2309 [torch.cuda.FloatTensor of size 1 (GPU 0)] 1.00000e-02 * 6.3919 9.1493 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4062 0.2180 0.1307 0.5762 0.1524 0.4794 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.1128 [torch.cuda.FloatTensor of size 1 (GPU 0)] loc: 10.175130844116211, clas: 72.91587829589844
Variable containing: 83.0910 [torch.cuda.FloatTensor of size 1 (GPU 0)]
learn.crit = ssd_loss
lr = 3e-3
lrs = np.array([lr/100,lr/10,lr])
learn.lr_find(lrs/1000,1.)
learn.sched.plot(1)
A Jupyter Widget
epoch trn_loss val_loss 0 86.852668 32587.789062
learn.fit(lr, 1, cycle_len=5, use_clr=(20,10))
A Jupyter Widget
epoch trn_loss val_loss 0 43.166077 32.56049 1 33.731625 28.329123 2 29.498006 27.387726 3 26.590789 26.043869 4 24.470896 25.746592
[25.746592]
learn.save('0')
learn.load('0')
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
learn.model.eval()
batch = learn.model(x)
b_clas,b_bb = batch
b_clas.size(),b_bb.size()
(torch.Size([64, 16, 21]), torch.Size([64, 16, 4]))
idx=7
b_clasi = b_clas[idx]
b_bboxi = b_bb[idx]
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
bbox,clas
(Variable containing: 0.6786 0.4866 0.9911 0.6250 0.7098 0.0848 0.9911 0.5491 0.5134 0.8304 0.6696 0.9063 [torch.cuda.FloatTensor of size 3x4 (GPU 0)], Variable containing: 8 10 17 [torch.cuda.LongTensor of size 3 (GPU 0)])
def torch_gt(ax, ima, bbox, clas, prs=None, thresh=0.4):
return show_ground_truth(ax, ima, to_np((bbox*224).long()),
to_np(clas), to_np(prs) if prs is not None else None, thresh)
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, bbox, clas)
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, anchor_cnr, b_clasi.max(1)[1])
grid_sizes
Variable containing: 0.2500 [torch.cuda.FloatTensor of size 1x1 (GPU 0)]
anchors
Variable containing: 0.1250 0.1250 0.2500 0.2500 0.1250 0.3750 0.2500 0.2500 0.1250 0.6250 0.2500 0.2500 0.1250 0.8750 0.2500 0.2500 0.3750 0.1250 0.2500 0.2500 0.3750 0.3750 0.2500 0.2500 0.3750 0.6250 0.2500 0.2500 0.3750 0.8750 0.2500 0.2500 0.6250 0.1250 0.2500 0.2500 0.6250 0.3750 0.2500 0.2500 0.6250 0.6250 0.2500 0.2500 0.6250 0.8750 0.2500 0.2500 0.8750 0.1250 0.2500 0.2500 0.8750 0.3750 0.2500 0.2500 0.8750 0.6250 0.2500 0.2500 0.8750 0.8750 0.2500 0.2500 [torch.cuda.FloatTensor of size 16x4 (GPU 0)]
a_ic = actn_to_bb(b_bboxi, anchors)
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, a_ic, b_clasi.max(1)[1], b_clasi.max(1)[0].sigmoid(), thresh=0.0)
overlaps = jaccard(bbox.data, anchor_cnr.data)
overlaps
Columns 0 to 9 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0091 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0356 0.0549 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 Columns 10 to 15 0.0922 0.0000 0.0000 0.0315 0.3985 0.0000 0.0103 0.0000 0.2598 0.4538 0.0653 0.0000 0.0000 0.1897 0.0000 0.0000 0.0000 0.0000 [torch.cuda.FloatTensor of size 3x16 (GPU 0)]
overlaps.max(1)
( 0.3985 0.4538 0.1897 [torch.cuda.FloatTensor of size 3 (GPU 0)], 14 13 11 [torch.cuda.LongTensor of size 3 (GPU 0)])
overlaps.max(0)
( 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0356 0.0549 0.0922 0.1897 0.2598 0.4538 0.3985 0.0000 [torch.cuda.FloatTensor of size 16 (GPU 0)], 0 0 0 0 0 0 0 0 1 1 0 2 1 1 0 0 [torch.cuda.LongTensor of size 16 (GPU 0)])
gt_overlap,gt_idx = map_to_ground_truth(overlaps)
gt_overlap,gt_idx
( 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0356 0.0549 0.0922 1.9900 0.2598 1.9900 1.9900 0.0000 [torch.cuda.FloatTensor of size 16 (GPU 0)], 0 0 0 0 0 0 0 0 1 1 0 2 1 1 0 0 [torch.cuda.LongTensor of size 16 (GPU 0)])
gt_clas = clas[gt_idx]; gt_clas
Variable containing: 8 8 8 8 8 8 8 8 10 10 8 17 10 10 8 8 [torch.cuda.LongTensor of size 16 (GPU 0)]
thresh = 0.5
pos = gt_overlap > thresh
pos_idx = torch.nonzero(pos)[:,0]
neg_idx = torch.nonzero(1-pos)[:,0]
pos_idx
11 13 14 [torch.cuda.LongTensor of size 3 (GPU 0)]
gt_clas[1-pos] = len(id2cat)
[id2cat[o] if o<len(id2cat) else 'bg' for o in gt_clas.data]
['bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'sofa', 'bg', 'diningtable', 'chair', 'bg']
gt_bbox = bbox[gt_idx]
loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
clas_loss = F.cross_entropy(b_clasi, gt_clas)
loc_loss,clas_loss
(Variable containing: 1.00000e-02 * 6.5691 [torch.cuda.FloatTensor of size 1 (GPU 0)], Variable containing: 1.1215 [torch.cuda.FloatTensor of size 1 (GPU 0)])
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(bbox,clas); bbox,clas
a_ic = actn_to_bb(b_bb[idx], anchors)
torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.01)
plt.tight_layout()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
anc_grids = [4,2,1]
# anc_grids = [2]
anc_zooms = [0.7, 1., 1.3]
# anc_zooms = [1.]
anc_ratios = [(1.,1.), (1.,0.5), (0.5,1.)]
# anc_ratios = [(1.,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
k = len(anchor_scales)
anc_offsets = [1/(o*2) for o in anc_grids]
k
9
anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)
anc_sizes = np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag for i in range(ag*ag) for o,p in anchor_scales])
for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchors
Variable containing: 0.1250 0.1250 0.2500 0.2500 0.1250 0.1250 0.2500 0.1250 0.1250 0.1250 0.1250 0.2500 0.1250 0.3750 0.2500 0.2500 0.1250 0.3750 0.2500 0.1250 0.1250 0.3750 0.1250 0.2500 0.1250 0.6250 0.2500 0.2500 0.1250 0.6250 0.2500 0.1250 0.1250 0.6250 0.1250 0.2500 0.1250 0.8750 0.2500 0.2500 0.1250 0.8750 0.2500 0.1250 0.1250 0.8750 0.1250 0.2500 0.3750 0.1250 0.2500 0.2500 0.3750 0.1250 0.2500 0.1250 0.3750 0.1250 0.1250 0.2500 0.3750 0.3750 0.2500 0.2500 0.3750 0.3750 0.2500 0.1250 0.3750 0.3750 0.1250 0.2500 0.3750 0.6250 0.2500 0.2500 0.3750 0.6250 0.2500 0.1250 0.3750 0.6250 0.1250 0.2500 0.3750 0.8750 0.2500 0.2500 0.3750 0.8750 0.2500 0.1250 0.3750 0.8750 0.1250 0.2500 0.6250 0.1250 0.2500 0.2500 0.6250 0.1250 0.2500 0.1250 0.6250 0.1250 0.1250 0.2500 0.6250 0.3750 0.2500 0.2500 0.6250 0.3750 0.2500 0.1250 0.6250 0.3750 0.1250 0.2500 0.6250 0.6250 0.2500 0.2500 0.6250 0.6250 0.2500 0.1250 0.6250 0.6250 0.1250 0.2500 0.6250 0.8750 0.2500 0.2500 0.6250 0.8750 0.2500 0.1250 0.6250 0.8750 0.1250 0.2500 0.8750 0.1250 0.2500 0.2500 0.8750 0.1250 0.2500 0.1250 0.8750 0.1250 0.1250 0.2500 0.8750 0.3750 0.2500 0.2500 0.8750 0.3750 0.2500 0.1250 0.8750 0.3750 0.1250 0.2500 0.8750 0.6250 0.2500 0.2500 0.8750 0.6250 0.2500 0.1250 0.8750 0.6250 0.1250 0.2500 0.8750 0.8750 0.2500 0.2500 0.8750 0.8750 0.2500 0.1250 0.8750 0.8750 0.1250 0.2500 0.2500 0.2500 0.5000 0.5000 0.2500 0.2500 0.5000 0.2500 0.2500 0.2500 0.2500 0.5000 0.2500 0.7500 0.5000 0.5000 0.2500 0.7500 0.5000 0.2500 0.2500 0.7500 0.2500 0.5000 0.7500 0.2500 0.5000 0.5000 0.7500 0.2500 0.5000 0.2500 0.7500 0.2500 0.2500 0.5000 0.7500 0.7500 0.5000 0.5000 0.7500 0.7500 0.5000 0.2500 0.7500 0.7500 0.2500 0.5000 0.5000 0.5000 1.0000 1.0000 0.5000 0.5000 1.0000 0.5000 0.5000 0.5000 0.5000 1.0000 [torch.cuda.FloatTensor of size 63x4 (GPU 0)]
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)
a=np.reshape((to_np(anchor_cnr) + to_np(torch.randn(*anchor_cnr.size()))*0.01)*224, -1)
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
drop=0.4
class SSD_MultiHead(nn.Module):
def __init__(self, k, bias):
super().__init__()
self.drop = nn.Dropout(drop)
self.sconv0 = StdConv(512,256, stride=1, drop=drop)
self.sconv1 = StdConv(256,256, drop=drop)
self.sconv2 = StdConv(256,256, drop=drop)
self.sconv3 = StdConv(256,256, drop=drop)
self.out0 = OutConv(k, 256, bias)
self.out1 = OutConv(k, 256, bias)
self.out2 = OutConv(k, 256, bias)
self.out3 = OutConv(k, 256, bias)
def forward(self, x):
x = self.drop(F.relu(x))
x = self.sconv0(x)
x = self.sconv1(x)
o1c,o1l = self.out1(x)
x = self.sconv2(x)
o2c,o2l = self.out2(x)
x = self.sconv3(x)
o3c,o3l = self.out3(x)
return [torch.cat([o1c,o2c,o3c], dim=1),
torch.cat([o1l,o2l,o3l], dim=1)]
head_reg4 = SSD_MultiHead(k, -4.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam
learn.crit = ssd_loss
lr = 1e-2
lrs = np.array([lr/100,lr/10,lr])
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(V(x))
batch[0].size(),batch[1].size()
(torch.Size([64, 189, 21]), torch.Size([64, 189, 4]))
ssd_loss(batch, y, True)
0.5598 0.7922 0.3095 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5718 0.7035 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.8714 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7268 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5163 0.4763 0.4033 0.4986 0.2990 0.0887 0.1046 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.3789 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5153 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3985 0.6215 0.5547 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.4246 0.5208 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.3436 0.3257 0.5062 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.9734 0.2506 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.3900 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7722 0.5395 0.6392 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.7618 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4679 0.8070 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6597 0.1274 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2041 0.2584 0.1665 0.2373 0.0872 0.3571 0.1477 0.2520 0.4103 0.3394 0.2884 0.4922 0.3787 0.3083 [torch.cuda.FloatTensor of size 14 (GPU 0)] 0.3465 0.4702 0.3075 0.3699 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.6350 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4666 0.5763 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6857 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1651 0.0595 0.6267 0.2088 0.3256 0.3768 0.4768 0.0901 0.0670 [torch.cuda.FloatTensor of size 9 (GPU 0)] 0.2862 0.6248 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.9427 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.6937 0.7272 0.4980 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5732 0.7237 0.9103 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2828 0.3771 0.4462 0.4403 0.4001 0.4693 0.2927 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.3013 0.1132 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4877 0.3506 0.2161 0.5820 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.7152 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5290 0.6212 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6086 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5147 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.6993 0.1816 0.3097 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5795 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5134 0.5408 0.3522 0.4801 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.4166 0.4327 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4907 0.1219 0.0792 0.5814 0.5660 0.6971 0.3726 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.5459 0.3704 0.1074 0.1848 0.4760 [torch.cuda.FloatTensor of size 5 (GPU 0)] 0.4316 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1364 0.6287 0.4061 0.1304 0.3701 0.4181 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.5735 0.2463 0.2852 0.6738 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.5688 0.6468 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2888 0.7060 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4828 0.2400 0.2915 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.3020 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4500 0.2089 0.7658 0.5281 0.4424 [torch.cuda.FloatTensor of size 5 (GPU 0)] 0.7547 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7615 0.4178 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.7539 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2938 0.1583 0.1342 0.5076 0.1785 0.2339 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.6536 0.4844 0.4022 0.3861 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.5740 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5858 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7901 0.5316 0.5802 0.6606 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.4405 0.4869 0.5088 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5414 0.5224 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.7278 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7401 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4864 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4314 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.9778 0.7884 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4372 0.5654 0.2238 0.5762 0.6364 0.4794 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.7330 [torch.cuda.FloatTensor of size 1 (GPU 0)] Variable containing: 4.5301 [torch.cuda.FloatTensor of size 1 (GPU 0)] Variable containing: 61.3364 [torch.cuda.FloatTensor of size 1 (GPU 0)]
Variable containing: 65.8664 [torch.cuda.FloatTensor of size 1 (GPU 0)]
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=2)
A Jupyter Widget
94%|█████████▍| 30/32 [00:18<00:01, 1.65it/s, loss=89.7]
learn.fit(lrs, 1, cycle_len=4, use_clr=(20,8))
A Jupyter Widget
epoch trn_loss val_loss 0 23.020269 22.007149 1 19.23732 15.323267 2 16.612079 13.967303 3 14.706582 12.920008
[12.920008]
learn.save('tmp')
learn.freeze_to(-2)
learn.fit(lrs/2, 1, cycle_len=4, use_clr=(20,8))
A Jupyter Widget
epoch trn_loss val_loss 0 14.021227 17.886932 1 13.386686 12.754044 2 12.297876 11.913645 3 11.29237 11.368293
[11.368293]
learn.save('prefocal')
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
ima=md.val_ds.ds.denorm(x)[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
a_ic = actn_to_bb(b_bb[idx], anchors)
torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.21)
plt.tight_layout()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
def plot_results(thresh):
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
ima=md.val_ds.ds.denorm(x)[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
a_ic = actn_to_bb(b_bb[idx], anchors)
clas_pr, clas_ids = b_clas[idx].max(1)
clas_pr = clas_pr.sigmoid()
torch_gt(ax, ima, a_ic, clas_ids, clas_pr, clas_pr.max().data[0]*thresh)
plt.tight_layout()
class FocalLoss(BCE_Loss):
def get_weight(self,x,t):
alpha,gamma = 0.25,1
p = x.sigmoid()
pt = p*t + (1-p)*(1-t)
w = alpha*t + (1-alpha)*(1-t)
return w * (1-pt).pow(gamma)
loss_f = FocalLoss(len(id2cat))
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(x)
ssd_loss(batch, y, True)
0.5598 0.7922 0.3095 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5718 0.7035 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.8714 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7268 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5163 0.4763 0.4033 0.4986 0.2990 0.0887 0.1046 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.3789 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5153 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.3985 0.6215 0.5547 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.4246 0.5208 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.3436 0.3257 0.5062 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.9734 0.2506 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.3900 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7722 0.5395 0.6392 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.7618 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4679 0.8070 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6597 0.1274 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2041 0.2584 0.1665 0.2373 0.0872 0.3571 0.1477 0.2520 0.4103 0.3394 0.2884 0.4922 0.3787 0.3083 [torch.cuda.FloatTensor of size 14 (GPU 0)] 0.3465 0.4702 0.3075 0.3699 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.6350 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4666 0.5763 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6857 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1651 0.0595 0.6267 0.2088 0.3256 0.3768 0.4768 0.0901 0.0670 [torch.cuda.FloatTensor of size 9 (GPU 0)] 0.2862 0.6248 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.9427 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.6937 0.7272 0.4980 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5732 0.7237 0.9103 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.2828 0.3771 0.4462 0.4403 0.4001 0.4693 0.2927 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.3013 0.1132 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4877 0.3506 0.2161 0.5820 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.7152 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5290 0.6212 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.6086 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5147 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.6993 0.1816 0.3097 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5795 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5134 0.5408 0.3522 0.4801 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.4166 0.4327 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4907 0.1219 0.0792 0.5814 0.5660 0.6971 0.3726 [torch.cuda.FloatTensor of size 7 (GPU 0)] 0.5459 0.3704 0.1074 0.1848 0.4760 [torch.cuda.FloatTensor of size 5 (GPU 0)] 0.4316 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.1364 0.6287 0.4061 0.1304 0.3701 0.4181 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.5735 0.2463 0.2852 0.6738 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.5688 0.6468 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.2888 0.7060 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4828 0.2400 0.2915 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.3020 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4500 0.2089 0.7658 0.5281 0.4424 [torch.cuda.FloatTensor of size 5 (GPU 0)] 0.7547 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7615 0.4178 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.7539 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.2938 0.1583 0.1342 0.5076 0.1785 0.2339 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.6536 0.4844 0.4022 0.3861 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.5740 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.5858 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7901 0.5316 0.5802 0.6606 [torch.cuda.FloatTensor of size 4 (GPU 0)] 0.4405 0.4869 0.5088 [torch.cuda.FloatTensor of size 3 (GPU 0)] 0.5414 0.5224 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.7278 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.7401 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4864 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.4314 [torch.cuda.FloatTensor of size 1 (GPU 0)] 0.9778 0.7884 [torch.cuda.FloatTensor of size 2 (GPU 0)] 0.4372 0.5654 0.2238 0.5762 0.6364 0.4794 [torch.cuda.FloatTensor of size 6 (GPU 0)] 0.7330 [torch.cuda.FloatTensor of size 1 (GPU 0)] loc: 3.6088805198669434, clas: 7.331346035003662
Variable containing: 10.9402 [torch.cuda.FloatTensor of size 1 (GPU 0)]
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=1)
A Jupyter Widget
19%|█▉ | 6/32 [00:05<00:24, 1.07it/s, loss=67.7]
Exception in thread Thread-228: Traceback (most recent call last): File "/home/jhoward/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner self.run() File "/home/jhoward/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run for instance in self.tqdm_cls._instances: File "/home/jhoward/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__ for itemref in self.data: RuntimeError: Set changed size during iteration
72%|███████▏ | 23/32 [00:15<00:06, 1.44it/s, loss=nan]
learn.fit(lrs, 1, cycle_len=10, use_clr=(20,10))
A Jupyter Widget
epoch trn_loss val_loss 0 27.244542 32.936592 1 24.687115 22.024887 2 22.251388 19.824711 3 20.238359 18.530561 4 18.613792 17.730865 5 17.376519 17.323956 6 16.33999 16.968851 7 15.425277 16.894522 8 14.683091 16.533207 9 14.044275 16.332354
[16.332354]
learn.save('fl0')
learn.load('fl0')
learn.freeze_to(-2)
learn.fit(lrs/4, 1, cycle_len=10, use_clr=(20,10))
A Jupyter Widget
epoch trn_loss val_loss 0 13.991107 17.163681 1 14.275143 16.685173 2 13.81701 16.067303 3 13.172081 15.567028 4 12.474847 15.480181 5 11.774984 15.262911 6 11.21864 15.10038 7 10.711037 15.184849 8 10.215424 14.942656 9 9.871555 14.910997
[14.910997]
learn.save('drop4')
learn.load('drop4')
plot_results(0.75)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
def nms(boxes, scores, overlap=0.5, top_k=100):
keep = scores.new(scores.size(0)).zero_().long()
if boxes.numel() == 0: return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = torch.mul(x2 - x1, y2 - y1)
v, idx = scores.sort(0) # sort in ascending order
idx = idx[-top_k:] # indices of the top-k largest vals
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
keep[count] = i
count += 1
if idx.size(0) == 1: break
idx = idx[:-1] # remove kept element from view
# load bboxes of next highest vals
torch.index_select(x1, 0, idx, out=xx1)
torch.index_select(y1, 0, idx, out=yy1)
torch.index_select(x2, 0, idx, out=xx2)
torch.index_select(y2, 0, idx, out=yy2)
# store element-wise max with next highest score
xx1 = torch.clamp(xx1, min=x1[i])
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
w = torch.clamp(w, min=0.0)
h = torch.clamp(h, min=0.0)
inter = w*h
# IoU = i / (area(a) + area(b) - i)
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter/union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
return keep, count
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)
def show_nmf(idx):
ima=md.val_ds.ds.denorm(x)[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
a_ic = actn_to_bb(b_bb[idx], anchors)
clas_pr, clas_ids = b_clas[idx].max(1)
clas_pr = clas_pr.sigmoid()
conf_scores = b_clas[idx].sigmoid().t().data
out1,out2,cc = [],[],[]
for cl in range(0, len(conf_scores)-1):
c_mask = conf_scores[cl] > 0.25
if c_mask.sum() == 0: continue
scores = conf_scores[cl][c_mask]
l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
boxes = a_ic[l_mask].view(-1, 4)
ids, count = nms(boxes.data, scores, 0.4, 50)
ids = ids[:count]
out1.append(scores[ids])
out2.append(boxes.data[ids])
cc.append([cl]*count)
if not cc:
print(f"{i}: empty array")
return
cc = T(np.concatenate(cc))
out1 = torch.cat(out1)
out2 = torch.cat(out2)
fig, ax = plt.subplots(figsize=(8,8))
torch_gt(ax, ima, out2, cc, out1, 0.1)
for i in range(12): show_nmf(i)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).