理论
iou交并比0-1
ws√r 和 hs/√r,之所以是乘以r和除以r,是因为两者相乘,得到的是wshs,这样对于不同的s,得到的不同图像比例的框的面积是一样的。
代码
import torch
from d2l import torch as d2l
from torchvision.transforms import transforms
# 用于生成锚框
# 输入是data:[batch_size,channel,h,w],sizes:list,ratios:list
#输出[1,H*W*(sizesNum+ratiosNum-1))
def multibox_prior(data,sizes,ratios):
# 把图像resize到300*400
transform = transforms.Resize((300, 400))
data = transform(data)
"""生成以每个像素为中心具有不同形状的锚框"""
# 输入图像的高,宽,h*w=300*400
in_height,in_width = data.shape[-2:]
device,num_sizes,num_ratios = data.device,len(sizes),len(ratios)
boxes_per_pixel = (num_sizes+num_ratios-1) #每个像素生成的锚框的个数
#将数据放到同一个设备上
size_tensor = torch.tensor(sizes,device=device)
ratio_tensor = torch.tensor(ratios,device=device)
#将锚框移动到每个像素的中心点,需要设置偏移量
offset_h,offset_w = 0.5,0.5
# 归一化系数,将图像的宽高归一化到0-1之间
steps_h = 1.0/in_height
steps_w = 1.0/in_width
# 生成所有锚框的中心点坐标,center_h:[300],center_w:[400],将数值归一化到0-1,
center_h = (torch.arange(in_height,device=device)+offset_h)*steps_h
center_w = (torch.arange(in_width,device=device)+offset_w)*steps_w
# 网格化的,共center_h行,center_w列[300,400],有两维,300*400,shift_y是每行一样数[[1,1,...,1],[2,2,..,2],...,[300,...,300]],shift_x是每列的数都一样
shift_y,shift_x = torch.meshgrid(center_h,center_w)
#做了reshape,只有一维了[120000],是要一一对应的,代表每一个坐标值
shift_y,shift_x = shift_y.reshape(-1),shift_x.reshape(-1)
#每个像素生成的锚框的宽和高,因为图片归一化了,所以不乘以图片的宽和高了,代码中*in_height / in_width是为了生成锚框的比例与输入图像比例保持一致,比如输入图像是1000*100的话,生成的锚框也该是这个比例的。[5]
w = torch.cat((size_tensor*torch.sqrt(ratio_tensor[0]),
sizes[0]*torch.sqrt(ratio_tensor[1:])))*in_height/in_width
h = torch.cat((size_tensor/torch.sqrt(ratio_tensor[0]),
sizes[0]/torch.sqrt(ratio_tensor[1:])))
#对每个像素施加标准锚框的坐标,默认dim=0->[4,5]
t1 = torch.stack((-w, -h, w, h))
#转置->[5,4]5种锚框,4个值代表每种锚框的左上和右下
t2 = t1.T
#[5,4]->[600000,4],/2表示以中心点的坐标值
anchor_manipulations = t2.repeat(in_height*in_width,1)/2
#每个像素的中心点坐标[120000]->[120000,4](两个中心点坐标,之后对应左上和右下)->[600000,4](添加5中不同锚框)
out_grid = torch.stack([shift_x,shift_y,shift_x,shift_y],dim=1).repeat_interleave(boxes_per_pixel,dim=0)
#每个像素的中心点坐标和在该像素上的标准锚框的坐标
output = out_grid + anchor_manipulations
#多出一个batch维度[600000,4]->[1,600000,4]
return output.unsqueeze(0)
img = d2l.plt.imread('../catdog.jpg')
h,w = img.shape[:2]
print(h,w)
X = torch.rand(size=(1,3,h,w))
Y = multibox_prior(X,sizes=[0.75,0.5,0.25],ratios=[1,2,0.5])
print(Y.shape)
demo
torch.Tensor.repeat() #
>>> x = torch.tensor([1, 2, 3])->[3]
>>> x.repeat(4, 2) ->[4,6] #2表示一维扩充2倍
tensor([[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3]])
>>> x.repeat(4, 2, 1).size()#1表示原始一维不变
torch.Size([4, 2, 3])
>>> x.repeat(4,2,2).shape
>torch.Size([4, 2, 6])
==================================
torch.repeat_interleave同上个不同
>>>x = torch.tensor([[1,2,3,4],[5,6,7,8]])
>>>x.repeat_interleave(3,dim=1)#在1维度,也就是不扩充0维了
>tensor([[1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
[5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8]])
>>>x.repeat_interleave(3,dim=0)#在0维也就是行那一维进行repeat
>tensor([[1, 2, 3, 4],
[1, 2, 3, 4],
[1, 2, 3, 4],
[5, 6, 7, 8],
[5, 6, 7, 8],
[5, 6, 7, 8]])
x = torch.tensor([[1,2,3,4],[5,6,7,8]])
x.repeat(2,1)
tensor([[1, 2, 3, 4],
[5, 6, 7, 8],
[1, 2, 3, 4],
[5, 6, 7, 8]])
x.repeat(1,2)
tensor([[1, 2, 3, 4, 1, 2, 3, 4],
[5, 6, 7, 8, 5, 6, 7, 8]])
在图片上生成锚框
#@save
def show_bboxes(axes, bboxes, labels=None, colors=None):
"""显示所有边界框。"""
def _make_list(obj, default_values=None):
if obj is None:
obj = default_values
elif not isinstance(obj, (list, tuple)):
obj = [obj]
return obj
labels = _make_list(labels)
colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
for i, bbox in enumerate(bboxes):
color = colors[i % len(colors)]
rect = d2l.bbox_to_rect(bbox.detach().numpy(), color)
axes.add_patch(rect)
if labels and len(labels) > i:
text_color = 'k' if color == 'w' else 'w'
axes.text(rect.xy[0], rect.xy[1], labels[i], va='center',
ha='center', fontsize=9, color=text_color,
bbox=dict(facecolor=color, lw=0))
d2l.set_figsize()
bbox_scale = torch.tensor((w, h, w, h))
fig = d2l.plt.imshow(img)
show_bboxes(fig.axes, boxes[250, 250, :, :] * bbox_scale, [
's=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2', 's=0.75, r=0.5'
])