关于yoloV3的苗匡处理函数

最新推荐文章于 2024-08-01 15:50:23 发布

舒溶

最新推荐文章于 2024-08-01 15:50:23 发布

阅读量211

点赞数

文章标签：深度学习机器学习神经网络

本文链接：https://blog.csdn.net/weixin_42975688/article/details/120381434

版权

关于yoloV3的苗匡处理函数

计算iou

def box_iou_xywh(box1, box2):
    x1min, y1min = box1[0] - box1[2] / 2.0, box1[1] - box1[3] / 2.0
    x1max, y1max = box1[0] + box1[2] / 2.0, box1[1] + box1[3] / 2.0
    s1 = box1[2] * box1[3]

    x2min, y2min = box2[0] - box2[2] / 2.0, box2[1] - box2[3] / 2.0
    x2max, y2max = box2[0] + box2[2] / 2.0, box2[1] + box2[3] / 2.0
    s2 = box2[2] * box2[3]

    xmin = np.minimum(x1min, x2min)
    xmax = np.maximum(x1max, x2max)
    ymin = np.minimum(y1min, y2min)
    ymax = np.maximum(y1max, y2max)
    inter_h = np.maximum(ymax - ymin, 0)
    inter_w = np.minimum(xmax - xmin, 0)
    inters = inter_w * inter_h
    union = s1 + s2 - inters
    iou = inters / union
    return iou

根据已经标注的真实框，在真实框的中心点处，生成以该点为中心点的候选框，并且对候选框进行标注

def get_objectness_label(img, gt_boxes, gt_labels, iou_threshold=0.7,
                         anchors=[116, 90, 156, 198, 373, 326],
                         num_classes=7, downsample=32):
    """
        用于生成候选框并且根据标注内容进行标注
       img 是输入的图像数据，形状是[N, C, H, W]
       gt_boxes，真实框，维度是[N, 50, 4]，其中50是真实框数目的上限，当图片中真实框不足50个时，不足部分的坐标全为0
                 真实框坐标格式是xywh，这里使用相对值
       gt_labels，真实框所属类别，维度是[N, 50]
       iou_threshold，当预测框与真实框的iou大于iou_threshold时不将其看作是负样本
       anchors，锚框可选的尺寸
       anchor_masks，通过与anchors一起确定本层级的特征图应该选用多大尺寸的锚框
       num_classes，类别数目
       downsample，特征图相对于输入网络的图片尺寸变化的比例
    """
    img_shape = img.shape
    batchsize = img_shape[0]
    num_anchors = len(anchors) // 2
    input_h = img_shape[2]
    input_w = img_shape[3]
    num_rows = input_h // downsample
    num_cols = input_w // downsample
    """
    num_rows代表横着的一个单元格的长度
    """
    label_objectness = np.zeros([batchsize, num_anchors, num_rows, num_cols])
    label_classification = np.zeros([batchsize, num_anchors, num_classes, num_rows, num_cols])
    label_location = np.zeros([batchsize, num_anchors, 4, num_rows, num_cols])
    scale_location = np.ones([batchsize, num_anchors, num_rows, num_cols])
    for n in range(batchsize):
        for n_gt in range(len(gt_boxes[n])):
            gt = gt_boxes[n][n_gt]
            gt_cls = gt_labels[n][n_gt]
            gt_center_x = gt[0]
            gt_center_y = gt[1]
            gt_width = gt[2]
            gt_height = gt[3]
            '''拿到一个真实框的参数'''
            if (gt_width < 1e-3 or gt_height < 1e-3):
                continue
            i = int(gt_center_y * num_rows)
            j = int(gt_center_x * num_cols)
            ious = []
            for ka in range(num_anchors):
                bbox1 = [0., 0., float(gt_width), float(gt_height)]
                anchors_w = anchors[ka * 2]
                anchors_h = anchors[ka * 2 + 1]
                bbox2 = [0., 0., anchors_w / float(input_w), anchors_h / float(input_h)]
                iou = box_iou_xywh(bbox1, bbox2)
                ious.append(iou)
            ious = np.array(ious)
            inds = np.argsort(ious)
            k = inds[-1]
            label_objectness[n, k, i, j] = 1
            c = gt_cls
            label_classification[n, k, c, i, j] = 1
            dx_label = gt_center_x * num_cols - j
            dy_label = gt_center_y * num_rows - i
            dw_label = np.log(gt_width * input_w / anchors[k * 2])
            dh_label = np.log(gt_height * input_h / anchors[k * 2 + 1])
            label_location[n, k, 0, i, j] = dx_label
            label_location[n, k, 1, i, j] = dy_label
            label_location[n, k, 2, i, j] = dw_label
            label_location[n, k, 3, i, j] = dh_label
            scale_location[n, k, i, j] = 2.0 - gt_width * gt_height

    return label_objectness.astype('float32'), \
           label_location.astype('float32'), \
           label_classification.astype('float32'), \
           scale_location.astype('float32')

将图像经过darknet53得到后的结果，再进行处理，使得这个结果与候选框组成的效果相对应

class YoloDetectionBlock(paddle.nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 is_test=True):
        super(YoloDetectionBlock, self).__init__()
        assert ch_out % 2 == 0, "channel {} cannot be divided by 2".format(ch_out)
        self.conv0 = ConvBNLayer(
            ch_in=ch_in,
            ch_out=ch_out,
            kernel_size=1,
            stride=1,
            padding=0)
        self.conv1 = ConvBNLayer(
            ch_in=ch_out,
            ch_out=ch_out * 2,
            kernel_size=3,
            stride=1,
            padding=1)
        self.conv2 = ConvBNLayer(
            ch_in=ch_out * 2,
            ch_out=ch_out,
            kernel_size=1,
            stride=1,
            padding=0)
        self.conv3 = ConvBNLayer(
            ch_in=ch_out,
            ch_out=ch_out * 2,
            kernel_size=3,
            stride=1,
            padding=1)
        self.route = ConvBNLayer(
            ch_in=ch_out * 2,
            ch_out=ch_out,
            kernel_size=1,
            stride=1,
            padding=0)
        self.tip = ConvBNLayer(
            ch_in=ch_out,
            ch_out=ch_out * 2,
            kernel_size=3,
            stride=1,
            padding=1)

    def forward(self, inputs):
        out = self.conv0(inputs)
        out = self.conv1(out)
        out = self.conv2(out)
        out = self.conv3(out)
        route = self.route(out)
        tip = self.tip(route)
        return route, tip

将xywh转换成xxyy形状

def sigmoid(x):
    return 1. / (1.0 + np.exp(-x))


def get_yolo_box_xxyy(pred, anchors, num_classes, downsample):
    batch_size = pred.shape[0]
    num_rows = pred.shape[-2]
    num_cols = pred.shape[-1]

    input_h = num_rows * downsample
    input_w = num_cols * downsample
    num_anchors = len(anchors) // 2
    """pred相当于p0"""
    pred = pred.reshape([-1, num_anchors, 5 + num_classes, num_rows, num_cols])
    pred_location = pred[:, :, 0:4, :, :]
    pred_location = np.transpose(pred_location, (0, 3, 4, 1, 2))
    """
    表示交换1,2和3,4，目标是让图像单点到前面来，然后那些每个点对应的苗匡和像素在后面
    """
    anchors_this = []
    for ind in range(num_anchors):
        anchors_this.append([anchors[ind * 2], anchors[ind * 2 + 1]])
    anchors_this = np.array(anchors_this).astype('float32')
    # 最终输出数据保存在pred_box中，其形状是[N, H, W, NUM_ANCHORS, 4]，
    # 其中最后一个维度4代表位置的4个坐标
    pred_box = np.zeros(pred_location.shape)
    for n in range(batch_size):
        for i in range(num_rows):
            for j in range(num_cols):
                for k in range(num_anchors):
                    pred_box[n, i, j, k, 0] = i
                    pred_box[n, i, j, k, 1] = j
                    pred_box[n, i, j, k, 2] = anchors_this[k][0]
                    pred_box[n, i, j, k, 3] = anchors_this[k][1]
    pred_box[:, :, :, :, 0] = (sigmoid(pred_location[:, :, :, :, 0]) + pred_box[:, :, :, :, 0]) / num_cols
    pred_box[:, :, :, :, 1] = (sigmoid(pred_location[:, :, :, :, 1]) + pred_box[:, :, :, :, 1]) / num_rows
    pred_box[:, :, :, :, 2] = (np.exp(pred_location[:, :, :, :, 2]) * pred_box[:, :, :, :, 2]) / input_w
    pred_box[:, :, :, :, 3] = (np.exp(pred_location[:, :, :, :, 3]) * pred_box[:, :, :, :, 3]) / input_h

    pred_box[:, :, :, :, 0] = pred_box[:, :, :, :, 0] - pred_box[:, :, :, :, 2] / 2
    pred_box[:, :, :, :, 1] = pred_box[:, :, :, :, 1] - pred_box[:, :, :, :, 3] / 2
    pred_box[:, :, :, :, 2] = pred_box[:, :, :, :, 0] + pred_box[:, :, :, :, 2]
    pred_box[:, :, :, :, 3] = pred_box[:, :, :, :, 1] + pred_box[:, :, :, :, 3]
    pred_box = np.clip(pred_box, 0., 1.0)
    return pred_box

将阈值大于指定iou的候选框样本找出来，并且将这些当中那些不是正样本的忽略

def get_iou_above_threds_inds(pred_box,gt_boxes,iou_threshold):
    batchsize=pred_box.shape[0]
    num_rows=pred_box.shape[1]
    num_cols=pred_box.shape[2]
    num_anchors=pred_box.shape[3]
    ret_inds=np.zeros([batchsize,num_rows,num_cols,num_anchors])
    for i in range(batchsize):
        pred_box_i=pred_box[i]
        gt_boxes_i=gt_boxes[i]
        for k in range(len(gt_boxes_i)):
            gt=gt_boxes_i[k]
            gtx_min=gt[0]-gt[2]/2
            gty_min=gt[1]-gt[3]/2
            gtx_max=gt[0]+gt[2]/2
            gty_max=gt[1]+gt[3]/2
            if (gtx_max-gtx_min<1e-3)or(gty_max-gty_min<1e-3):
                continue
            x1=np.maximum(pred_box_i[:,:,:,0],gtx_min)
            y1=np.maximum(pred_box_i[:,:,:,1],gty_min)
            x2=np.minimum(pred_box_i[:,:,:,2],gtx_max)
            y2=np.minimum(pred_box_i[:,:,:,3],gty_max)
            intersection=np.maximum(x2-x1,0)*np.maximum(y2-y1,0)
            s1=(gty_max-gty_min)*(gtx_max-gtx_min)
            s2=(pred_box_i[:,:,:,2]-pred_box_i[:,:,:,0])*(pred_box_i[:,:,:,3]-pred_box_i[:,:,:,1])
            union=s1+s2-intersection
            iou=intersection/union
            above_inds=np.where(iou>iou_threshold)
            ret_inds[i][above_inds]=1
    ret_inds=np.transpose(ret_inds,(0,3,1,2))
    return ret_inds.astype('bool')
def label_objectness_ignore(label_objectness,iou_above_thresh_indices):
    negative_indices=(label_objectness<0.5)
    ignore_indices=negative_indices*iou_above_thresh_indices
    label_objectness[ignore_indices]=-1
    return label_objectness

损失函数

def get_loss(output,
             label_objectness,
             label_location,
             label_classification,
             scales,num_anchors=3,
             num_classes=7):
    """设计损失函数"""
    reshaped_output=paddle.reshape(output,[-1,num_anchors,num_classes+5
        ,output.shape[2],output.shape[3]])
    pred_objectness=reshaped_output[:,:,4:,:]
    loss_objectness=F.binary_cross_entropy_with_logits\
        (pred_objectness,label_objectness,reduction='none')
    # pos_samples 只有在正样本的地方取值为1.，其它地方取值全为0.
    pos_objectness=label_objectness>0
    pos_samples=paddle.cast(pos_objectness,'float32')
    pos_samples.stop_gradient=True
    # 从output中取出所有跟位置相关的预测值
    tx = reshaped_output[:, :, 0, :, :]
    ty = reshaped_output[:, :, 1, :, :]
    tw = reshaped_output[:, :, 2, :, :]
    th = reshaped_output[:, :, 3, :, :]

    # 从label_location中取出各个位置坐标的标签
    dx_label = label_location[:, :, 0, :, :]
    dy_label = label_location[:, :, 1, :, :]
    tw_label = label_location[:, :, 2, :, :]
    th_label = label_location[:, :, 3, :, :]

    # 构建损失函数
    loss_location_x = F.binary_cross_entropy_with_logits(tx, dx_label, reduction="none")
    loss_location_y = F.binary_cross_entropy_with_logits(ty, dy_label, reduction="none")
    loss_location_w = paddle.abs(tw - tw_label)
    loss_location_h = paddle.abs(th - th_label)

    # 计算总的位置损失函数
    loss_location = loss_location_x + loss_location_y + loss_location_h + loss_location_w

    # 乘以scales
    loss_location = loss_location * scales
    # 只计算正样本的位置损失函数
    loss_location = loss_location * pos_samples

    # 从output取出所有跟物体类别相关的像素点
    pred_classification = reshaped_output[:, :, 5:5 + num_classes, :, :]

    # 计算分类相关的损失函数
    loss_classification = F.binary_cross_entropy_with_logits(pred_classification, label_classification,
                                                             reduction="none")

    # 将第2维求和
    loss_classification = paddle.sum(loss_classification, axis=2)

    # 只计算objectness为正的样本的分类损失函数
    loss_classification = loss_classification * pos_samples
    total_loss = loss_objectness + loss_location + loss_classification
    # 对所有预测框的loss进行求和
    total_loss = paddle.sum(total_loss, axis=[1, 2, 3])
    # 对所有样本求平均
    total_loss = paddle.mean(total_loss)

    return total_loss