#!/usr/bin/env python
# coding: utf-8

# # 边界框与锚框 

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import d2l
from mxnet import image, nd, contrib, np, npx

d2l.set_figsize()
img = image.imread('catdog.jpg').asnumpy()
d2l.plt.imshow(img)

npx.set_np()


# ## 边界框
# 
# 一个边界框可由（左上角x，左上角y，右下角x，右下角y）来定义。

# In[2]:


dog_bbox, cat_bbox = [60, 45, 378, 516], [400, 112, 655, 493]

def bbox_to_rect(bbox, color): 
    # Convert to matplotlib format: ((upper-left x, upper-left y), width, height).
    return d2l.plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)

fig = d2l.plt.imshow(img)
fig.axes.add_patch(bbox_to_rect(dog_bbox, 'blue'))
fig.axes.add_patch(bbox_to_rect(cat_bbox, 'red'));


# ## 锚框
# 
# 定义一个在一张图里画多个框的函数 

# In[3]:


def show_bboxes(axes, bboxes, labels=None, colors=None):
    def _make_list(obj, default_values=None):
        if obj is None:
            obj = default_values
        elif not isinstance(obj, (list, tuple)):
            obj = [obj]
        return obj
    labels = _make_list(labels)
    colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
    for i, bbox in enumerate(bboxes):
        color = colors[i % len(colors)]
        rect = d2l.bbox_to_rect(bbox.asnumpy(), color)
        axes.add_patch(rect)
        if labels and len(labels) > i:
            text_color = 'k' if color == 'w' else 'w'
            axes.text(rect.xy[0], rect.xy[1], labels[i],
                      va='center', ha='center', fontsize=9, color=text_color,
                      bbox=dict(facecolor=color, lw=0))


# 中心位于 (250, 250) 的锚框

# In[4]:


h, w = img.shape[0:2]
X = np.random.uniform(size=(1, 3, h, w))  # Construct input data.
Y = npx.multibox_prior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5])
boxes = Y.reshape((h, w, 5, 4))
print(boxes[250, 250, 0, :])

bbox_scale = np.array((w, h, w, h))
fig = d2l.plt.imshow(img)
show_bboxes(fig.axes, boxes[250, 250, :, :] * bbox_scale,
            ['s=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2',
             's=0.75, r=0.5'])


# ## 标记训练集的锚框

# In[5]:


ground_truth = np.array([[0, 0.1, 0.08, 0.52, 0.92],
                         [1, 0.55, 0.2, 0.9, 0.88]])
anchors = np.array([[0, 0.1, 0.2, 0.3], [0.15, 0.2, 0.4, 0.4],
                    [0.63, 0.05, 0.88, 0.98], [0.66, 0.45, 0.8, 0.8],
                    [0.57, 0.3, 0.92, 0.9]])
fig = d2l.plt.imshow(img)
show_bboxes(fig.axes, ground_truth[:, 1:] * bbox_scale, ['dog', 'cat'], 'k')
show_bboxes(fig.axes, anchors * bbox_scale, ['0', '1', '2', '3', '4']);


# 每个锚框都被标记为一个类别或者是背景

# In[6]:


labels = npx.multibox_target(np.expand_dims(anchors, axis=0),
                                   np.expand_dims(ground_truth, axis=0),
                                   np.zeros((1, 3, 5)))
# assigned labels: (batch_size, #anchors)
print(labels[2])
# masks: (batch_size, 4 x #anchors), 0 for background, 1 for object
print(labels[1])
# offset to bounding boxes: (batch_size, 4 x #anchors)
print(labels[0])


# ## 输出预测的边界框

# In[7]:


anchors = np.array([[0.1, 0.08, 0.52, 0.92], [0.08, 0.2, 0.56, 0.95],
                    [0.15, 0.3, 0.62, 0.91], [0.55, 0.2, 0.9, 0.88]])
offset_preds = np.array([0] * anchors.size)
cls_probs = np.array([[0] * 4,  # Predicted probability for background
                      [0.9, 0.8, 0.7, 0.1],  # Predicted probability for dog
                      [0.1, 0.2, 0.3, 0.9]])  # Predicted probability for cat
fig = d2l.plt.imshow(img)
show_bboxes(fig.axes, anchors * bbox_scale,
            ['dog=0.9', 'dog=0.8', 'dog=0.7', 'cat=0.9'])


# 非极大值抑制（Non-maximum suppression）:

# In[8]:


output = npx.multibox_detection(
    np.expand_dims(cls_probs, axis=0), np.expand_dims(offset_preds, axis=0),
    np.expand_dims(anchors, axis=0), nms_threshold=0.5)
output


# 结果可视化

# In[9]:


fig = d2l.plt.imshow(img)
for i in output[0].asnumpy():
    if i[0] == -1:
        continue
    label = ('dog=', 'cat=')[int(i[0])] + str(i[1])
    show_bboxes(fig.axes, [np.array(i[2:]) * bbox_scale], label)


# ## 多尺度目标检测

# In[10]:


def display_anchors(fmap_w, fmap_h, s):
    fmap = np.zeros((1, 10, fmap_w, fmap_h))  # The values from the first two dimensions will not affect the output.
    anchors = npx.multibox_prior(fmap, sizes=s, ratios=[1, 2, 0.5])
    bbox_scale = np.array((w, h, w, h))
    d2l.show_bboxes(d2l.plt.imshow(img).axes, anchors[0] * bbox_scale)
    
display_anchors(fmap_w=4, fmap_h=4, s=[0.15])


# In[11]:


display_anchors(fmap_w=2, fmap_h=2, s=[0.4])


# In[12]:


display_anchors(fmap_w=1, fmap_h=1, s=[0.8])


# In[ ]: