def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA=True):
"""
x = torch.nn.Conv2d(x.shape[1], (5+num_classes)*len(anchors), kernel_size=1, stride=1, padding=0)(x)
https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-3/
:param prediction: [2, 255, 13, 13]. 这个255=(1+4+80)*3
:param inp_dim: 416. 图片原始大小
:param anchors: [(116, 90), (156, 198), (373, 326)]. 在原图上的预设anchors大小hw
:param num_classes: 80. coco类别数
:param CUDA: bool. 是否加速
:return: [b, bbox_num, 85]. bbox_num = 13*13*3 / 26*26*3 / 52*52*3. 85=x, y, h, w, cls_0, cls_1, ..., cls_79.
back to origin size.
"""
batch_size = prediction.size(0) # 2. batch_size
stride = inp_dim // prediction.size(2) # 32. 下降倍数
grid_size = inp_dim // stride # 13. 最后预测特征图大小是13x13,每个像素是一个网格
bbox_attrs = 5 + num_classes # 85. 一个预测框具有的属性有85个,1+4+80
num_anchors = len(anchors) # 3. 当前预测层,每个网格需要预测的anchor个数。
# [2, 255, 13, 13] -> [2, 255, 13*13]. 即[b, 所有anchor的所有属性, 所有像素]
prediction = prediction.view(batch_size, bbox_attrs * num_anchors, grid_size * grid_size)
# [b, 13*13, 85*3],即[b, 所有像素, 所有属性], 即一行代表一个网格的所有预测
prediction = prediction.transpose(1, 2).contiguous()
# [b,13*13,85*3]->[b,13*13*3,85],即一行代表一个预测框的所有属性。
prediction = prediction.view(batch_size, grid_size * grid_size * num_anchors, bbox_attrs)
anchors = [(a[0] / stride, a[1] / stride) for a in anchors]
# Sigmoid the centre_X, centre_Y. and object confidencce, -> [0, 1]
prediction[:, :, 0] = torch.sigmoid(prediction[:, :, 0]) # centre_X
prediction[:, :, 1] = torch.sigmoid(prediction[:, :, 1]) # centre_Y
prediction[:, :, 4] = torch.sigmoid(prediction[:, :, 4]) # object confidencce
# Add the center offsets
grid = np.arange(grid_size) # [0, 1, 2, ..., 12]
a, b = np.meshgrid(grid, grid) # x, y. 从左到右,从上到下遍历特征图的索引。
"""
a = [
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12],
[ 0 1 2 3 4 5 6 7 8 9 10 11 12]]
b = [
[ 0 0 0 0 0 0 0 0 0 0 0 0 0],
[ 1 1 1 1 1 1 1 1 1 1 1 1 1],
[ 2 2 2 2 2 2 2 2 2 2 2 2 2],
[ 3 3 3 3 3 3 3 3 3 3 3 3 3],
[ 4 4 4 4 4 4 4 4 4 4 4 4 4],
[ 5 5 5 5 5 5 5 5 5 5 5 5 5],
[ 6 6 6 6 6 6 6 6 6 6 6 6 6],
[ 7 7 7 7 7 7 7 7 7 7 7 7 7],
[ 8 8 8 8 8 8 8 8 8 8 8 8 8],
[ 9 9 9 9 9 9 9 9 9 9 9 9 9],
[10 10 10 10 10 10 10 10 10 10 10 10 10],
[11 11 11 11 11 11 11 11 11 11 11 11 11],
[12 12 12 12 12 12 12 12 12 12 12 12 12]]
"""
x_offset = torch.FloatTensor(a).view(-1, 1) #
y_offset = torch.FloatTensor(b).view(-1, 1)
"""
x_offset = [[ 0.], [ 1.], [ 2.], [ 3.], [ 4.], [ 5.], [ 6.], [ 7.], [ 8.], [ 9.], [10.], [11.], [12.], [ 0.], [ 1.],
[ 2.], ..., [12.], [12.]]
y_offset = [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [1.], [1.], [1.], [1.],
[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [2.], [2.], ..., [12.], [12.]]
"""
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
prediction = prediction.cuda()
# -> [1, 13*13*3, 2], 即[1, 所有网格个数,每个网格的索引],索引扫面的顺序是从左到右,从上到下。
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0)
"""
索引扫面的顺序是从左到右,从上到下。
x_y_offset[0, :, :] = 【
[ 0. 0.],
[ 0. 0.],
[ 0. 0.],
[ 1. 0.],
[ 1. 0.],
[ 1. 0.],
[ 2. 0.],
...
[11. 12.],
[11. 12.],
[11. 12.],
[12. 12.],
[12. 12.],
[12. 12.]
"""
# 1, xy
"""
x = x_pred + x_cell
y = y_pred + y_cell
"""
prediction[:, :, :2] += x_y_offset
# log space transform height and the width
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size * grid_size, 1).unsqueeze(0) # [3, 2] -> [1, 507, 2],即[1, 所有anchor个数,hw]
"""
anchors = [
[ 3.625 2.8125 ],
[ 4.875 6.1875 ],
[11.65625 10.1875 ],
[ 3.625 2.8125 ],
[ 4.875 6.1875 ],
[11.65625 10.1875 ],
...
"""
# 2, hw,根据预设的anchor,调整预测的hw
# x, y, h, w, cls_0, cls_1, ..., cls_79
# prediction: [2, 507, 85], 507代表所有预测框。prediction[:, :, 2:4]的shape: [2, 13*13*3, 2]
prediction[:, :, 2:4] = torch.exp(prediction[:, :, 2:4]) * anchors # anchors.shape: [1, 13*13*3, 2]
# 3, cls
prediction[:, :, 5: 5 + num_classes] = torch.sigmoid((prediction[:, :, 5: 5 + num_classes]))
# return to origin size. xyhw
prediction[:, :, :4] *= stride
return prediction