CV学习2：Faster-RCNN代码学习记录

最新推荐文章于 2024-03-14 14:41:13 发布

吾系桉宁

最新推荐文章于 2024-03-14 14:41:13 发布

阅读量154

点赞数

分类专栏：笔记文章标签： python pytorch 深度学习

本文链接：https://blog.csdn.net/weixin_39298885/article/details/104732146

版权

笔记专栏收录该内容

9 篇文章 0 订阅

订阅专栏

文章目录

生成anchor框，并剔除超出原图范围的无效框

def init_anchor(img_size=800, sub_sample=16):
    ratios = [0.5, 1, 2]
    anchor_scales = [8, 16, 32]  # 该尺寸是针对特征图的

    # 一个特征点对应原图片中的16*16个像素点区域, 'img_size // sub_sample'得到特征图的尺寸
    feature_size = (img_size // sub_sample)
    # 这里相当于把图像分割成feature_size*feature_size的网格， 每个网格对应一个特征点。
    # ctr_x， ctr_y: 每个网格的右下方坐标
    ctr_x = np.arange(sub_sample, (feature_size + 1) * sub_sample, sub_sample)  # 共feature_size个
    ctr_y = np.arange(sub_sample, (feature_size + 1) * sub_sample, sub_sample)  # 共feature_size个
    # print len(ctr_x)  # 50

    index = 0
    # ctr: 每个网格的中心点，一共feature_size*feature_size个网格
    ctr = dict()
    for x in range(len(ctr_x)):
        for y in range(len(ctr_y)):
            ctr[index] = [-1, -1]
            ctr[index][1] = ctr_x[x] - 8  # 右下角坐标 - 8 = 中心坐标
            ctr[index][0] = ctr_y[y] - 8
            index += 1
    # print len(ctr)  # 将原图片分割成50*50=2500(feature_size*feature_size)个区域的中心点

    # 初始化：每个区域有9个anchors候选框，每个候选框的坐标(y1, x1, y2, x2)
    anchors = np.zeros(((feature_size * feature_size * 9), 4))  # (22500, 4)
    index = 0
    # 将候选框的坐标赋值到anchors
    for c in ctr:
        ctr_y, ctr_x = ctr[c]
        for i in range(len(ratios)):
            for j in range(len(anchor_scales)):
                # anchor_scales 是针对特征图的，所以需要乘以下采样"sub_sample"
                h = sub_sample * anchor_scales[j] * np.sqrt(ratios[i])
                w = sub_sample * anchor_scales[j] * np.sqrt(1. / ratios[i])
                anchors[index, 0] = ctr_y - h / 2.
                anchors[index, 1] = ctr_x - w / 2.
                anchors[index, 2] = ctr_y + h / 2.
                anchors[index, 3] = ctr_x + w / 2.
                index += 1

    # 去除坐标出界的边框，保留图片内的框——图片内框
    valid_anchor_index = np.where(
        (anchors[:, 0] >= 0) &
        (anchors[:, 1] >= 0) &
        (anchors[:, 2] <= 800) &
        (anchors[:, 3] <= 800)
    )[0]  # 该函数返回数组中满足条件的index
    # print valid_anchor_index.shape  # (8940,)，表明有8940个框满足条件

    # 获取有效anchor（即边框都在图片内的anchor）的坐标
    valid_anchor_boxes = anchors[valid_anchor_index]
    # print(valid_anchor_boxes.shape)  # (8940, 4)

    return anchors, valid_anchor_boxes, valid_anchor_index

计算每个valid anchor boxes与每个bbox的IoU

# 计算有效anchor框"valid_anchor_boxes"与目标框"bbox"的IOU
def compute_iou(valid_anchor_boxes, bbox):
    valid_anchor_num = len(valid_anchor_boxes)
    ious = np.empty((valid_anchor_num, 2), dtype=np.float32)
    ious.fill(0)
    for num1, i in enumerate(valid_anchor_boxes):
        ya1, xa1, ya2, xa2 = i
        anchor_area = (ya2 - ya1) * (xa2 - xa1)  # anchor框面积
        for num2, j in enumerate(bbox):
            yb1, xb1, yb2, xb2 = j
            box_area = (yb2 - yb1) * (xb2 - xb1)  # 目标框面积
            inter_x1 = max([xb1, xa1])
            inter_y1 = max([yb1, ya1])
            inter_x2 = min([xb2, xa2])
            inter_y2 = min([yb2, ya2])
            if (inter_x1 < inter_x2) and (inter_y1 < inter_y2):
                iter_area = (inter_y2 - inter_y1) * (inter_x2 - inter_x1)  # anchor框和目标框的相交面积
                iou = iter_area / (anchor_area + box_area - iter_area)  # IOU计算
            else:
                iou = 0.

            ious[num1, num2] = iou

    return ious
 # ious.shape=(8940,2)，每行代表一个anchor与所有bbox的iou

enumerate使用

https://blog.csdn.net/churximi/article/details/51648388?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task

分配正负样本

def get_pos_neg_sample(ious, valid_anchor_len, pos_iou_threshold=0.7,neg_iou_threshold=0.3, pos_ratio=0.5, n_sample=256):
    gt_argmax_ious = ious.argmax(axis=0)  # 找出每个目标实体框最大IOU的anchor框index，共2个, 与图片内目标框数量一致
    gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])]  # 获取每个目标实体框最大IOU的值，与gt_argmax_ious对应, 共2个，与图片内目标框数量一致
    argmax_ious = ious.argmax(axis=1)  # 找出每个anchor框最大IOU的目标框index，共8940个, 每个anchor框都会对应一个最大IOU的目标框
    max_ious = ious[np.arange(valid_anchor_len), argmax_ious]  # 获取每个anchor框的最大IOU值， 与argmax_ious对应, 每个anchor框内都会有一个最大值

    gt_argmax_ious = np.where(ious == gt_max_ious)[0]  # 根据上面获取的目标最大IOU值，获取等于该值的index
    # print gt_argmax_ious.shape  # (18,) 共计18个
    # 与bbox有最大iou的anchor不只一个，把所有的anchor都列出来

    label = np.empty((valid_anchor_len,), dtype=np.int32)
    label.fill(-1)
    # print label.shape  # (8940,)
    label[max_ious < neg_iou_threshold] = 0  # anchor框内最大IOU值小于neg_iou_threshold，设为0
    label[gt_argmax_ious] = 1  # anchor框有全局最大IOU值，设为1
    label[max_ious >= pos_iou_threshold] = 1  # anchor框内最大IOU值大于等于pos_iou_threshold，设为1

    n_pos = pos_ratio * n_sample  # 正例样本数
    # n_sample为总样本数

    # 随机获取n_pos个正例，
    pos_index = np.where(label == 1)[0]
    if len(pos_index) > n_pos:
        disable_index = np.random.choice(pos_index, size=(len(pos_index) - n_pos), replace=False)
        label[disable_index] = -1

    n_neg = n_sample - np.sum(label == 1)
    neg_index = np.where(label == 0)[0]

    if len(neg_index) > n_neg:
        disable_index = np.random.choice(neg_index, size=(len(neg_index) - n_neg), replace=False)
        label[disable_index] = -1
        
    # 若正负样本数和<总样本数，不填充
    return label, argmax_ious

np.argmax使用

https://blog.csdn.net/zjm750617105/article/details/51318248?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522158364929319725256755105%2522%252C%2522scm%2522%253A%252220140713.130056874…%2522%257D&request_id=158364929319725256755105&biz_id=0&utm_source=distribute.pc_search_result.none-task

np.where()使用

https://blog.csdn.net/sinat_41939868/article/details/89469803?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522158365022819724846419015%2522%252C%2522scm%2522%253A%252220140713.130056874…%2522%257D&request_id=158365022819724846419015&biz_id=0&utm_source=distribute.pc_search_result.none-task

边框回归系数计算

def get_coefficient(anchor, bbox):
    # 根据上面得到的预测框和与之对应的目标框，计算4维参数（平移参数：dy, dx； 缩放参数：dh, dw）
    height = anchor[:, 2] - anchor[:, 0]
    width = anchor[:, 3] - anchor[:, 1]
    ctr_y = anchor[:, 0] + 0.5 * height
    ctr_x = anchor[:, 1] + 0.5 * width
    base_height = bbox[:, 2] - bbox[:, 0]
    base_width = bbox[:, 3] - bbox[:, 1]
    base_ctr_y = bbox[:, 0] + 0.5 * base_height
    base_ctr_x = bbox[:, 1] + 0.5 * base_width

    eps = np.finfo(height.dtype).eps
    height = np.maximum(height, eps)
    width = np.maximum(width, eps)
    # np.finfo.eps取非零的最小值
    # 防止下面的分母为0 或 log后为负

    dy = (base_ctr_y - ctr_y) / height
    dx = (base_ctr_x - ctr_x) / width
    dh = np.log(base_height / height)
    dw = np.log(base_width / width)

    gt_roi_locs = np.vstack((dy, dx, dh, dw)).transpose()   #np.vstack()垂直方向组合，transpose()转置
    # print(gt_roi_locs.shape)
    # 每个anchor都对应了一组回归系数dy,dx,dh,dw

    return gt_roi_locs

np.vstack()和np.hstack()

https://blog.csdn.net/nanhuaibeian/article/details/100597342?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522158366297919724846406649%2522%252C%2522scm%2522%253A%252220140713.130056874…%2522%257D&request_id=158366297919724846406649&biz_id=0&utm_source=distribute.pc_search_result.none-task

网络搭建make_layers()

def _make_layers(self, cfg):
    layers = []
    in_channels = 3
    for x in cfg:
        if x == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                       nn.BatchNorm2d(x),
                       nn.ReLU(inplace=True)]
            in_channels = x

    # layers += [nn.Conv2d(in_channels, 512, kernel_size=3, padding=1)]
    return nn.Sequential(*layers)   # 不加*号，会报错 TypeError: list is not a Module subclass

nn.Sequential(*layers)

https://blog.csdn.net/u013548568/article/details/80294708

吾系桉宁

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
CV学习2：Faster-RCNN代码学习记录

文章目录生成anchor框，并剔除超出原图范围的无效框计算每个valid anchor boxes与每个bbox的IoUenumerate使用分配正负样本np.argmax使用np.where()使用边框回归系数计算np.vstack()和np.hstack()make_layers()nn.Sequential(*layers)生成anchor框，并剔除超出原图范围的无效框def init_...
复制链接

扫一扫