#!/usr/bin/env python # coding: utf-8 # # 边界框与锚框 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import d2l from mxnet import image, nd, contrib, np, npx d2l.set_figsize() img = image.imread('catdog.jpg').asnumpy() d2l.plt.imshow(img) npx.set_np() # ## 边界框 # # 一个边界框可由(左上角x,左上角y,右下角x,右下角y)来定义。 # In[2]: dog_bbox, cat_bbox = [60, 45, 378, 516], [400, 112, 655, 493] def bbox_to_rect(bbox, color): # Convert to matplotlib format: ((upper-left x, upper-left y), width, height). return d2l.plt.Rectangle( xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1], fill=False, edgecolor=color, linewidth=2) fig = d2l.plt.imshow(img) fig.axes.add_patch(bbox_to_rect(dog_bbox, 'blue')) fig.axes.add_patch(bbox_to_rect(cat_bbox, 'red')); # ## 锚框 # # 定义一个在一张图里画多个框的函数 # In[3]: def show_bboxes(axes, bboxes, labels=None, colors=None): def _make_list(obj, default_values=None): if obj is None: obj = default_values elif not isinstance(obj, (list, tuple)): obj = [obj] return obj labels = _make_list(labels) colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c']) for i, bbox in enumerate(bboxes): color = colors[i % len(colors)] rect = d2l.bbox_to_rect(bbox.asnumpy(), color) axes.add_patch(rect) if labels and len(labels) > i: text_color = 'k' if color == 'w' else 'w' axes.text(rect.xy[0], rect.xy[1], labels[i], va='center', ha='center', fontsize=9, color=text_color, bbox=dict(facecolor=color, lw=0)) # 中心位于 (250, 250) 的锚框 # In[4]: h, w = img.shape[0:2] X = np.random.uniform(size=(1, 3, h, w)) # Construct input data. Y = npx.multibox_prior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]) boxes = Y.reshape((h, w, 5, 4)) print(boxes[250, 250, 0, :]) bbox_scale = np.array((w, h, w, h)) fig = d2l.plt.imshow(img) show_bboxes(fig.axes, boxes[250, 250, :, :] * bbox_scale, ['s=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2', 's=0.75, r=0.5']) # ## 标记训练集的锚框 # In[5]: ground_truth = np.array([[0, 0.1, 0.08, 0.52, 0.92], [1, 0.55, 0.2, 0.9, 0.88]]) anchors = np.array([[0, 0.1, 0.2, 0.3], [0.15, 0.2, 0.4, 0.4], [0.63, 0.05, 0.88, 0.98], [0.66, 0.45, 0.8, 0.8], [0.57, 0.3, 0.92, 0.9]]) fig = d2l.plt.imshow(img) show_bboxes(fig.axes, ground_truth[:, 1:] * bbox_scale, ['dog', 'cat'], 'k') show_bboxes(fig.axes, anchors * bbox_scale, ['0', '1', '2', '3', '4']); # 每个锚框都被标记为一个类别或者是背景 # In[6]: labels = npx.multibox_target(np.expand_dims(anchors, axis=0), np.expand_dims(ground_truth, axis=0), np.zeros((1, 3, 5))) # assigned labels: (batch_size, #anchors) print(labels[2]) # masks: (batch_size, 4 x #anchors), 0 for background, 1 for object print(labels[1]) # offset to bounding boxes: (batch_size, 4 x #anchors) print(labels[0]) # ## 输出预测的边界框 # In[7]: anchors = np.array([[0.1, 0.08, 0.52, 0.92], [0.08, 0.2, 0.56, 0.95], [0.15, 0.3, 0.62, 0.91], [0.55, 0.2, 0.9, 0.88]]) offset_preds = np.array([0] * anchors.size) cls_probs = np.array([[0] * 4, # Predicted probability for background [0.9, 0.8, 0.7, 0.1], # Predicted probability for dog [0.1, 0.2, 0.3, 0.9]]) # Predicted probability for cat fig = d2l.plt.imshow(img) show_bboxes(fig.axes, anchors * bbox_scale, ['dog=0.9', 'dog=0.8', 'dog=0.7', 'cat=0.9']) # 非极大值抑制(Non-maximum suppression): # In[8]: output = npx.multibox_detection( np.expand_dims(cls_probs, axis=0), np.expand_dims(offset_preds, axis=0), np.expand_dims(anchors, axis=0), nms_threshold=0.5) output # 结果可视化 # In[9]: fig = d2l.plt.imshow(img) for i in output[0].asnumpy(): if i[0] == -1: continue label = ('dog=', 'cat=')[int(i[0])] + str(i[1]) show_bboxes(fig.axes, [np.array(i[2:]) * bbox_scale], label) # ## 多尺度目标检测 # In[10]: def display_anchors(fmap_w, fmap_h, s): fmap = np.zeros((1, 10, fmap_w, fmap_h)) # The values from the first two dimensions will not affect the output. anchors = npx.multibox_prior(fmap, sizes=s, ratios=[1, 2, 0.5]) bbox_scale = np.array((w, h, w, h)) d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale) display_anchors(fmap_w=4, fmap_h=4, s=[0.15]) # In[11]: display_anchors(fmap_w=2, fmap_h=2, s=[0.4]) # In[12]: display_anchors(fmap_w=1, fmap_h=1, s=[0.8]) # In[ ]: