锚框生成算法、repeat_interleave,repeat区别

最新推荐文章于 2024-08-02 18:01:08 发布

暄染落墨

最新推荐文章于 2024-08-02 18:01:08 发布

阅读量427

点赞数 3

分类专栏：目标检测文章标签：深度学习

本文链接：https://blog.csdn.net/qq_36136196/article/details/118854158

版权

目标检测专栏收录该内容

16 篇文章 2 订阅

订阅专栏

理论

在这里插入图片描述
iou交并比0-1

ws√r 和 hs/√r，之所以是乘以r和除以r，是因为两者相乘，得到的是wshs,这样对于不同的s,得到的不同图像比例的框的面积是一样的。

代码

import torch
from d2l import torch as d2l
from torchvision.transforms import transforms

# 用于生成锚框
# 输入是data：[batch_size,channel,h,w],sizes:list,ratios:list
#输出[1,H*W*(sizesNum+ratiosNum-1))
def multibox_prior(data,sizes,ratios):
	# 把图像resize到300*400
    transform = transforms.Resize((300, 400))
    data = transform(data)
    """生成以每个像素为中心具有不同形状的锚框"""
     # 输入图像的高，宽，h*w=300*400
    in_height,in_width = data.shape[-2:]
    device,num_sizes,num_ratios = data.device,len(sizes),len(ratios)
    boxes_per_pixel = (num_sizes+num_ratios-1) #每个像素生成的锚框的个数
    #将数据放到同一个设备上
    size_tensor = torch.tensor(sizes,device=device)
    ratio_tensor = torch.tensor(ratios,device=device)

    #将锚框移动到每个像素的中心点，需要设置偏移量
    offset_h,offset_w = 0.5,0.5
    # 归一化系数，将图像的宽高归一化到0-1之间
    steps_h = 1.0/in_height
    steps_w = 1.0/in_width

    # 生成所有锚框的中心点坐标，center_h:[300],center_w:[400],将数值归一化到0-1,
    center_h = (torch.arange(in_height,device=device)+offset_h)*steps_h
    center_w = (torch.arange(in_width,device=device)+offset_w)*steps_w

    # 网格化的，共center_h行，center_w列[300,400]，有两维,300*400,shift_y是每行一样数[[1,1,...,1],[2,2,..,2],...,[300,...,300]]，shift_x是每列的数都一样
    shift_y,shift_x = torch.meshgrid(center_h,center_w)
    #做了reshape，只有一维了[120000],是要一一对应的，代表每一个坐标值
    shift_y,shift_x = shift_y.reshape(-1),shift_x.reshape(-1)

    #每个像素生成的锚框的宽和高，因为图片归一化了，所以不乘以图片的宽和高了，代码中*in_height / in_width是为了生成锚框的比例与输入图像比例保持一致，比如输入图像是1000*100的话，生成的锚框也该是这个比例的。[5]
    w = torch.cat((size_tensor*torch.sqrt(ratio_tensor[0]),
                   sizes[0]*torch.sqrt(ratio_tensor[1:])))*in_height/in_width

    h = torch.cat((size_tensor/torch.sqrt(ratio_tensor[0]),
                   sizes[0]/torch.sqrt(ratio_tensor[1:])))

    #对每个像素施加标准锚框的坐标,默认dim=0->[4,5]
    t1 = torch.stack((-w, -h, w, h))
    #转置->[5,4]5种锚框，4个值代表每种锚框的左上和右下
    t2 = t1.T
    #[5,4]->[600000,4],/2表示以中心点的坐标值
    anchor_manipulations = t2.repeat(in_height*in_width,1)/2

    #每个像素的中心点坐标[120000]->[120000,4](两个中心点坐标，之后对应左上和右下）->[600000,4](添加5中不同锚框)
    out_grid = torch.stack([shift_x,shift_y,shift_x,shift_y],dim=1).repeat_interleave(boxes_per_pixel,dim=0)

    #每个像素的中心点坐标和在该像素上的标准锚框的坐标
    output = out_grid + anchor_manipulations

    #多出一个batch维度[600000,4]->[1,600000,4]
    return output.unsqueeze(0)

img = d2l.plt.imread('../catdog.jpg')
h,w = img.shape[:2]
print(h,w)
X = torch.rand(size=(1,3,h,w))
Y = multibox_prior(X,sizes=[0.75,0.5,0.25],ratios=[1,2,0.5])
print(Y.shape)

demo

torch.Tensor.repeat() #
>>> x = torch.tensor([1, 2, 3])->[3]
>>> x.repeat(4, 2) ->[4,6] #2表示一维扩充2倍
tensor([[ 1,  2,  3,  1,  2,  3],
        [ 1,  2,  3,  1,  2,  3],
        [ 1,  2,  3,  1,  2,  3],
        [ 1,  2,  3,  1,  2,  3]])
>>> x.repeat(4, 2, 1).size()#1表示原始一维不变
torch.Size([4, 2, 3])
>>> x.repeat(4,2,2).shape
>torch.Size([4, 2, 6])
==================================
torch.repeat_interleave同上个不同
>>>x = torch.tensor([[1,2,3,4],[5,6,7,8]])
>>>x.repeat_interleave(3,dim=1)#在1维度，也就是不扩充0维了
>tensor([[1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
        [5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8]])
>>>x.repeat_interleave(3,dim=0)#在0维也就是行那一维进行repeat
>tensor([[1, 2, 3, 4],
        [1, 2, 3, 4],
        [1, 2, 3, 4],
        [5, 6, 7, 8],
        [5, 6, 7, 8],
        [5, 6, 7, 8]])

x = torch.tensor([[1,2,3,4],[5,6,7,8]])
x.repeat(2,1)

tensor([[1, 2, 3, 4],
[5, 6, 7, 8],
[1, 2, 3, 4],
[5, 6, 7, 8]])

x.repeat(1,2)

tensor([[1, 2, 3, 4, 1, 2, 3, 4],
[5, 6, 7, 8, 5, 6, 7, 8]])

在图片上生成锚框

#@save
def show_bboxes(axes, bboxes, labels=None, colors=None):
    """显示所有边界框。"""
    def _make_list(obj, default_values=None):
        if obj is None:
            obj = default_values
        elif not isinstance(obj, (list, tuple)):
            obj = [obj]
        return obj

    labels = _make_list(labels)
    colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
    for i, bbox in enumerate(bboxes):
        color = colors[i % len(colors)]
        rect = d2l.bbox_to_rect(bbox.detach().numpy(), color)
        axes.add_patch(rect)
        if labels and len(labels) > i:
            text_color = 'k' if color == 'w' else 'w'
            axes.text(rect.xy[0], rect.xy[1], labels[i], va='center',
                      ha='center', fontsize=9, color=text_color,
                      bbox=dict(facecolor=color, lw=0))

d2l.set_figsize()
bbox_scale = torch.tensor((w, h, w, h))
fig = d2l.plt.imshow(img)
show_bboxes(fig.axes, boxes[250, 250, :, :] * bbox_scale, [
    's=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2', 's=0.75, r=0.5'
])