yolov3的整体网络结构代码实现

最新推荐文章于 2024-08-05 23:17:10 发布

舒溶

最新推荐文章于 2024-08-05 23:17:10 发布

阅读量233

点赞数

文章标签：神经网络深度学习机器学习

本文链接：https://blog.csdn.net/weixin_42975688/article/details/120383593

版权

yolov3的整体网络结构代码实现

损失函数

def get_loss(output,
             label_objectness,
             label_location,
             label_classification,
             scales,num_anchors=3,
             num_classes=7):
    """设计损失函数"""
    reshaped_output=paddle.reshape(output,[-1,num_anchors,num_classes+5
        ,output.shape[2],output.shape[3]])
    pred_objectness=reshaped_output[:,:,4:,:]
    loss_objectness=F.binary_cross_entropy_with_logits\
        (pred_objectness,label_objectness,reduction='none')
    # pos_samples 只有在正样本的地方取值为1.，其它地方取值全为0.
    pos_objectness=label_objectness>0
    pos_samples=paddle.cast(pos_objectness,'float32')
    pos_samples.stop_gradient=True
    # 从output中取出所有跟位置相关的预测值
    tx = reshaped_output[:, :, 0, :, :]
    ty = reshaped_output[:, :, 1, :, :]
    tw = reshaped_output[:, :, 2, :, :]
    th = reshaped_output[:, :, 3, :, :]

    # 从label_location中取出各个位置坐标的标签
    dx_label = label_location[:, :, 0, :, :]
    dy_label = label_location[:, :, 1, :, :]
    tw_label = label_location[:, :, 2, :, :]
    th_label = label_location[:, :, 3, :, :]

    # 构建损失函数
    loss_location_x = F.binary_cross_entropy_with_logits(tx, dx_label, reduction="none")
    loss_location_y = F.binary_cross_entropy_with_logits(ty, dy_label, reduction="none")
    loss_location_w = paddle.abs(tw - tw_label)
    loss_location_h = paddle.abs(th - th_label)

    # 计算总的位置损失函数
    loss_location = loss_location_x + loss_location_y + loss_location_h + loss_location_w

    # 乘以scales
    loss_location = loss_location * scales
    # 只计算正样本的位置损失函数
    loss_location = loss_location * pos_samples

    # 从output取出所有跟物体类别相关的像素点
    pred_classification = reshaped_output[:, :, 5:5 + num_classes, :, :]

    # 计算分类相关的损失函数
    loss_classification = F.binary_cross_entropy_with_logits(pred_classification, label_classification,
                                                             reduction="none")

    # 将第2维求和
    loss_classification = paddle.sum(loss_classification, axis=2)

    # 只计算objectness为正的样本的分类损失函数
    loss_classification = loss_classification * pos_samples
    total_loss = loss_objectness + loss_location + loss_classification
    # 对所有预测框的loss进行求和
    total_loss = paddle.sum(total_loss, axis=[1, 2, 3])
    # 对所有样本求平均
    total_loss = paddle.mean(total_loss)

    return total_loss

将图片放大

class Upsample(paddle.nn.Layer):
    # 用来将图片放大的卷积层
    def __init__(self,scale=2):
        super(Upsample, self).__init__()
        self.scale=scale
    def forward(self, inputs):
        shape_nchw=paddle.shape(inputs)
        shape_hw=paddle.slice(shape_nchw,axes=[0],starts=[2],ends=[4])
        shape_hw.stop_gradient=True
        in_shape=paddle.cast(shape_hw,dtype='int32')
        out_shape=in_shape*self.scale
        out_shape.stop_gradient=True
        out=paddle.nn.functional.interpolate(
            x=inputs,scale_factor=self.scale,mode='NEAREST'
        )
        # 用于将图片扩大到原来的scale倍的函数
        return out

整体结构。

里面的损失函数是，将骨干网络输出的c0，c1，c2，转化成p后，将三个p分别与gtbox，也就是真实值计算损失值，然后将这三个损失值相加。这里调用了api，其原理相当于上面的损失函数使用三次

class YOLOV3(paddle.nn.Layer):
    def __init__(self,num_classes=7):
        super(YOLOV3, self).__init__()
        self.num_classes=num_classes
        self.block=DarkNet53_conv_body()
        self.block_outputs=[]
        self.yolo_blocks = []
        self.route_blocks_2 = []
        for i in range(3):
            # 添加一个从ci生成ri，ti的层
            yolo_block=self.\
                add_sublayer("yolo_detection_block_%d"%(i),
                              YoloDetectionBlock(
                                  ch_in=512//(2**i)*2 if i==0 else 512//(2**i)*2+512//(2**i),
                                  ch_out=512//(2**i)
                              ))
            self.yolo_blocks.append(yolo_block)

            num_filters=3*(self.num_classes+5)
            # 添加从ti到pi的卷积网络
            block_out=self.\
                add_sublayer("yolo_detection_block_%d"%(i),
                             paddle.nn.Conv2D(
                                 ch_in=512//(2**i)*2,
                                 ch_out=num_filters,
                                 kernel_size=1,
                                 stride=1,
                                 weight_attr=paddle.ParamAttr(
                                     initializer=paddle.nn.initializer.Normal(0.,0.02)),
                                 bias_attr=paddle.ParamAttr(
                                     initializer=paddle.nn.initializer.Constant(0.0),
                                     regularizer=paddle.regularizer.L2Decay(0.))
                                 )
                             )
            self.block_outputs.append(block_out)
            if i<2:
                # 添加ri到下一层，也就是和ci+1结合的那个卷积层
                route=self.add_sublayer("route2_%d"%(i),
                                        ConvBNLayer(ch_in=512//(2**i),
                                                    ch_out=256/(2**i),
                                                    kernel_size=1,
                                                    stride=1,padding=0))
                self.route_blocks_2.append(route)
            self.upsample=Upsample
    def forward(self, inputs):
        outputs=[]
        blocks=self.block[inputs]
        for i,block in enumerate(blocks):
            if i>0:
                # 将r_{i-1}经过卷积和上采样之后得到特征图，与这一级的ci进行拼接
                block=paddle.concat([route,block],axis=1)
                # 从ci生成ri，ti
            route,tip=self.yolo_blocks[i](block)
            block_out=self.block_outputs[i](tip)
            outputs.append(block_out)
            if i<2:
                # 对route进行处理，做route往下传播的卷积层
                route=self.route_blocks_2[i](route)
                # 将route放大
                route=self.upsample(route)
        return outputs

    def get_loss(self,outputs,gtbox,gtlabel,gtscore=None,
                 anchors=[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326],
                 anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
                 ignore_thresh=0.7,
                 use_label_smooth=False):
        self.losses=[]
        downsample=32
        for i,out in enumerate(outputs):
            anchor_masks_i=anchor_masks[i]
            loss=paddle.vision.ops.yolo_loss(
                x=out,
                gt_box=gtbox,
                gt_label=gtlabel,
                gt_score=gtscore,
                anchors=anchors,
                anchor_mask=anchor_masks_i,
                class_num=self.num_classes,
                ignore_thresh=ignore_thresh,
                downsample_ratio=downsample,
                use_label_smooth=False
            )
            self.losses.append(loss)
            downsample=downsample//2
        return sum(self.losses)

舒溶

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
yolov3的整体网络结构代码实现

yolov3的整体网络结构代码实现损失函数def get_loss(output, label_objectness, label_location, label_classification, scales,num_anchors=3, num_classes=7): """设计损失函数""" reshaped_output=paddle.reshape(o
复制链接

扫一扫