_generate_anchors

呆呆囧想学C++

已于 2024-01-10 15:43:36 修改

阅读量366

点赞数

分类专栏：代码笔记文章标签：笔记

于 2023-12-27 12:12:37 首次发布

本文链接：https://blog.csdn.net/m0_46011550/article/details/135241849

版权

代码笔记专栏收录该内容

3 篇文章 0 订阅

订阅专栏

我这里只是自己阅读代码时的笔记，这里举例的是一组size生成的锚框，比如FPN针对RPN的改进是将网络头部应用到每一个P层。由于每个P层相对于原始图片具有不同的尺度信息，因此作者将原始RPN中的尺度信息分离，让每个P层只处理单一的尺度信息。

代码首先将（32,64,128）分别乘（2 ** (（0,1,2） / 3）就是如下

条件：假设size=((32,40.6,50.3),(64.0, 80.6, 101.5), (128.0, 161.2, 203.1))，
stride=(8,16,32)
这里len(size)=len(stride)是正确的

1.举例值：假如遍历时stride=16, sizes=(64.0, 80.6, 101.5)的锚框生成，会生成9组，大的特征图生成的锚框大，size应该是根据特征图大小分布的。

通常情况下，stride 反映了在特征图上每个像素对应原始输入图像中的多少像素。因此，使用 stride 作为 base_size 的值可以保证在不同尺寸的特征图上生成的锚框能够适应不同尺寸的输入图像。如果你在使用这个函数时将 base_size 设置为其他值，那么生成的锚框将是相对于这个新的基础尺寸的。一般而言，我们会根据具体的任务和模型设计选择合适的 base_size，或者使用默认值 stride 以保持相对一致性。

2.源代码

#################make_anchor_generator_retinanet  rpn开始函数#######
def make_anchor_generator_retinanet(config):
    """
    anchor_sizes: 锚框的基本尺寸列表。
aspect_ratios: 锚框的宽高比列表。
anchor_strides: 锚框生成的步长列表。
straddle_thresh: 一个阈值，用于判断锚框是否越过图像边界。
octave: 锚框的缩放倍数。
scales_per_octave: 每个 octave 中锚框的数量
     "ANCHOR_SIZES": [32, 64, 128],
    "ASPECT_RATIOS": [0.5, 1.0, 2.0],
    "ANCHOR_STRIDES": [4, 8, 16],
     "STRADDLE_THRESH": 0,
    "OCTAVE": 2, 可以控制在不同 octave 内生成的锚框相对于基本尺寸的缩放程度
    "SCALES_PER_OCTAVE": 3
    """
    anchor_sizes = config.MODEL.RETINANET.ANCHOR_SIZES
    aspect_ratios = config.MODEL.RETINANET.ASPECT_RATIOS
    anchor_strides = config.MODEL.RETINANET.ANCHOR_STRIDES
    straddle_thresh = config.MODEL.RETINANET.STRADDLE_THRESH
    octave = config.MODEL.RETINANET.OCTAVE
    scales_per_octave = config.MODEL.RETINANET.SCALES_PER_OCTAVE

    assert len(anchor_strides) == len(anchor_sizes), "Only support FPN now"
    new_anchor_sizes = []
    for size in anchor_sizes:
        per_layer_anchor_sizes = []
        for scale_per_octave in range(scales_per_octave):
            octave_scale = octave ** (scale_per_octave / float(scales_per_octave))
            per_layer_anchor_sizes.append(octave_scale * size)
        new_anchor_sizes.append(tuple(per_layer_anchor_sizes))   #new_anchor_sizes = [(32, 45.25, 64), (64, 90.51, 128)]，每个特征层的size

    

    anchor_generator = AnchorGenerator(
        tuple(new_anchor_sizes), aspect_ratios, anchor_strides, straddle_thresh
    )
    return anchor_generator

class AnchorGenerator(nn.Module):
    """
    For a set of image sizes and feature maps, computes a set
    of anchors
    步幅strides通常是指在特征图上每个像素之间的距离 而大小sizes可能是指锚框的尺寸。
    这个检查的目的是确保在设置特征金字塔网络时，用户提供的参数是合理的，即在每个尺度上都有定义相应的步幅和大小。
    """


###################AnchorGenerator   跳到第二个函数#############
class AnchorGenerator(nn.Module):
    """
    For a set of image sizes and feature maps, computes a set
    of anchors
    """
    def __init__(
        self,
        sizes=(128, 256, 512),
        aspect_ratios=(0.5, 1.0, 2.0),
        anchor_strides=(8, 16, 32),
        straddle_thresh=0,
    ):
        super(AnchorGenerator, self).__init__()

        if len(anchor_strides) == 1:
            anchor_stride = anchor_strides[0]
            cell_anchors = [
                generate_anchors(anchor_stride, sizes, aspect_ratios).float()
            ]
        else:
            if len(anchor_strides) != len(sizes):
                raise RuntimeError("FPN should have #anchor_strides == #sizes")

            cell_anchors = [
                generate_anchors(
                    anchor_stride,
                    size if isinstance(size, (tuple, list)) else (size,),  #这是为了确保 size 最终是一个元组
                    aspect_ratios
                ).float()
                for anchor_stride, size in zip(anchor_strides, sizes)
            ]
        self.strides = anchor_strides
        self.cell_anchors = BufferList(cell_anchors)
        self.straddle_thresh = straddle_thresh

###########生成锚框的函数############################
def generate_anchors(
    stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
):
    """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
    are centered on stride / 2, have (approximate) sqrt areas of the specified
    sizes, and aspect ratios as given.
    
    base_size= stride
    在这个代码中，base_size 代表了锚框的基础尺寸。生成的锚框是相对于这个基础尺寸的。在函数 generate_anchors 中，
    base_size 的默认值是 stride，也就是说生成的锚框的尺寸是相对于步幅的。
    这样设计的目的是为了在不同的特征图上生成具有相似形状和比例的锚框。
    通常情况下，stride 反映了在特征图上每个像素对应原始输入图像中的多少像素。
    因此，使用 stride 作为 base_size 的值可以保证在不同尺寸的特征图上生成的锚框能够适应不同尺寸的输入图像。
    如果你在使用这个函数时将 base_size 设置为其他值，那么生成的锚框将是相对于这个新的基础尺寸的。
    一般而言，我们会根据具体的任务和模型设计选择合适的 base_size，或者使用默认值 stride 以保持相对一致性。
    """
    return _generate_anchors(
        stride,
        np.array(sizes, dtype=np.float) / stride,
        np.array(aspect_ratios, dtype=np.float),
    )

##########################注意下面函数的传入值################
#首先将stride步幅作为size的原因，适应不同尺寸的输入图像，将得到的值最后乘stride就是多少步幅的锚框。假如最后再计算的步幅，锚框需要一步一步循环移动。

def _generate_anchors(base_size, scales, aspect_ratios):
    """Generate anchor (reference) windows by enumerating aspect ratios X
    scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
    """
    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
    anchors = _ratio_enum(anchor, aspect_ratios)
    
    anchors = np.vstack(     #np.vstack 是 NumPy 中的一个函数，用于在垂直方向（沿着行的方向）堆叠数组
        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
    )
    return torch.from_numpy(anchors)

3.举例子

import numpy as np
def _whctrs(anchor):
    """Return width, height, x center, and y center for an anchor (window)."""
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr


def _mkanchors(ws, hs, x_ctr, y_ctr):
    
    """Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    这个函数是当乘扩大比例是根据中心点计算锚框的坐标
    """
    ws = ws[:, np.newaxis]  #用于增加数组的维度
    hs = hs[:, np.newaxis]
    anchors = np.hstack(    #np.hstack 是 NumPy 中的一个函数，用于在水平方向（沿着列的方向）堆叠数组
        (
            x_ctr - 0.5 * (ws - 1),
            y_ctr - 0.5 * (hs - 1),
            x_ctr + 0.5 * (ws - 1),
            y_ctr + 0.5 * (hs - 1),
        )
    )
    return anchors
def _scale_enum(anchor, scales):
    """Enumerate a set of anchors for each scale wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors
##################_ratio_enum函数的生成###############
anchor=np.array([1, 1, 16, 16], dtype=np.float64) - 1
ratios=np.array((0.5,1,2), dtype=np.float64)
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))   #根据比例变化的尺度，计算宽
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
print("_ratio_enum函数的生成")
print(anchors)

##################_scale_enum函数的生成###############
#(64.0, 80.6, 101.5)是经过 octave缩放的结果，1,1/3,2/3这种缩放比例
scales= np.array((64.0, 80.6, 101.5), dtype=np.float64) / 16
anchors = np.vstack(     #np.vstack 是 NumPy 中的一个函数，用于在垂直方向（沿着行的方向）堆叠数组
        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
    )
print("_scale_enum函数的生成")
print(anchors)

4.举例值理解（这里就是一层FPN层的锚框）

_ratio_enum函数的生成
[[-3.5  2.  18.5 13. ]
 [ 0.   0.  15.  15. ]
 [ 2.5 -3.  12.5 18. ]]
_scale_enum函数的生成
[[-38.       -16.        53.        31.      ]
 [-49.93125  -22.225     64.93125   37.225   ]
 [-64.953125 -30.0625    79.953125  45.0625  ]
 [-24.       -24.        39.        39.      ]
 [-32.3      -32.3       47.3       47.3     ]
 [-42.75     -42.75      57.75      57.75    ]
 [-14.       -36.        29.        51.      ]
 [-19.70625  -47.4125    34.70625   62.4125  ]
 [-26.890625 -61.78125   41.890625  76.78125 ]]