2.7.1 损失函数LOSS的详解

一、损失函数原理

二、代码注释

代码注释如下:

import math
from copy import deepcopy
from functools import partial

import numpy as np
import torch
import torch.nn as nn


class YOLOLoss(nn.Module):
    def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0):
        super(YOLOLoss, self).__init__()
        #-----------------------------------------------------------#
        #   20x20的特征层对应的anchor是[116,90],[156,198],[373,326]
        #   40x40的特征层对应的anchor是[30,61],[62,45],[59,119]
        #   80x80的特征层对应的anchor是[10,13],[16,30],[33,23]
        #-----------------------------------------------------------#
        self.anchors        = anchors
        self.num_classes    = num_classes
        self.bbox_attrs     = 5 + num_classes
        self.input_shape    = input_shape
        self.anchors_mask   = anchors_mask
        self.label_smoothing = label_smoothing

        self.threshold      = 4

        self.balance        = [0.4, 1.0, 4]
        self.box_ratio      = 0.05
        self.obj_ratio      = 1 * (input_shape[0] * input_shape[1]) / (640 ** 2)
        self.cls_ratio      = 0.5 * (num_classes / 80)
        self.cuda = cuda

    def clip_by_tensor(self, t, t_min, t_max):
        t = t.float()
        result = (t >= t_min).float() * t + (t < t_min).float() * t_min
        result = (result <= t_max).float() * result + (result > t_max).float() * t_max
        return result

    #均方损失函数:实际值减去预测值的平方
    def MSELoss(self, pred, target):
        return torch.pow(pred - target, 2)

    #交叉熵损失:loss=-ylog(pred)-(1-y)log(1-pred)
    def BCELoss(self, pred, target):
        epsilon = 1e-7
        pred    = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
        output  = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
        return output

    #置信损失函数
    def box_giou(self, b1, b2):
        """
        输入为:
        ----------
        b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh    (x,y)为坐标  (w,h)为预测框的宽和长
        b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh   (x,y)为坐标  (w,h)为真实框的宽和长

        返回为:
        -------
        giou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
        """
        #----------------------------------------------------#
        #   求出预测框左上角右下角
        #----------------------------------------------------#
        #b1:[4, 3, 80, 80, 4],[4, 3, 40, 40, 4],[4, 3, 20, 20, 4]
        b1_xy       = b1[..., :2]#(x,y):三种网格的每个格的左上角的坐标
        b1_wh       = b1[..., 2:4]#(w,h):三种网格的每个格的宽和高
        b1_wh_half  = b1_wh/2.#取三种网格的宽和高的一半
        b1_mins     = b1_xy - b1_wh_half#格子左上角向左或者向上平移半个格子的长度
        b1_maxes    = b1_xy + b1_wh_half#格子左上角向右或者向下平移半个格子的长度
        # print(">>>>>>>>>>>>>>>>>>>>>b1",b1.shape)
        # print(">>>>>>>>>>>>>>>>>>>>>b", b1[..., :2])
        #----------------------------------------------------#
        #   求出真实框左上角右下角
        #----------------------------------------------------#
        #以下真实框和预测框的注释相同
        b2_xy       = b2[..., :2]
        b2_wh       = b2[..., 2:4]
        b2_wh_half  = b2_wh/2.
        b2_mins     = b2_xy - b2_wh_half
        b2_maxes    = b2_xy + b2_wh_half

        #----------------------------------------------------#
        #   求真实框和预测框所有的iou(置信度)
        #----------------------------------------------------#
        #iou=(AnB)/(AUB),其中A表示预测框,B表示真实框,式子表示(真实框与预测框的交集的面积)除以(真实框与预测框的并集的面积)
        intersect_mins  = torch.max(b1_mins, b2_mins)
        intersect_maxes = torch.min(b1_maxes, b2_maxes)
        intersect_wh    = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
        intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]#真实框与预测框的交集的面积
        b1_area         = b1_wh[..., 0] * b1_wh[..., 1]
        b2_area         = b2_wh[..., 0] * b2_wh[..., 1]
        union_area      = b1_area + b2_area - intersect_area#真实框与预测框的并集的面积
        iou             = intersect_area / union_area

        #----------------------------------------------------#
        #   找到包裹两个框的最小框的左上角和右下角
        #----------------------------------------------------#
        enclose_mins    = torch.min(b1_mins, b2_mins)#[4, 3, 40, 40, 2],[4, 3, 80, 80, 2]
        enclose_maxes   = torch.max(b1_maxes, b2_maxes)
        #print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>enclose_mins",enclose_mins.shape)
        enclose_wh      = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))#[4, 3, 40, 40, 2],[4, 3, 80, 80, 2]
        #print("-------------enclose_wh",enclose_wh.shape,enclose_wh)
        #----------------------------------------------------#
        #   计算对角线距离
        #----------------------------------------------------#
        enclose_area    = enclose_wh[..., 0] * enclose_wh[..., 1]#enclose_area:表示真实框和预测框的最左上角的点和最右下角的点围成的矩形的面积,具体看YOLOV5的置信损失函数
        giou            = iou - (enclose_area - union_area) / enclose_area
        
        return giou

    #---------------------------------------------------#
    #   平滑标签
    #---------------------------------------------------#
    #平滑标签的目的是让神经网络不那么自信,例如,softmax的结果:(1,0)->[1,0]*(1-0.1)+0.05=[0.95,0.05],在这里,label_smoothing=0.1,num_classes=2
    #label_smoothing:平滑标签系数;     num_classes:预测的目标类别数量
    def smooth_labels(self, y_true, label_smoothing, num_classes):
        return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes

    def forward(self, l, input, targets=None, y_true=None):
        #----------------------------------------------------#
        #   l               代表使用的是第几个有效特征层
        #   input的shape为  bs, 3*(5+num_classes), 20, 20
        #                   bs, 3*(5+num_classes), 40, 40
        #                   bs, 3*(5+num_classes), 80, 80
        #   targets         真实框的标签情况 [batch_size, num_gt, 5]
        #----------------------------------------------------#
        #--------------------------------#
        #   获得图片数量,特征层的高和宽
        #   20, 20
        #--------------------------------#
        bs      = input.size(0)
        in_h    = input.size(2)
        in_w    = input.size(3)
        #-----------------------------------------------------------------------#
        #   计算步长
        #   每一个特征点对应原来的图片上多少个像素点
        #   [640, 640] 高的步长为640 / 20 = 32,宽的步长为640 / 20 = 32
        #   如果特征层为20x20的话,一个特征点就对应原来的图片上的32个像素点
        #   如果特征层为40x40的话,一个特征点就对应原来的图片上的16个像素点
        #   如果特征层为80x80的话,一个特征点就对应原来的图片上的8个像素点
        #   stride_h = stride_w = 32、16、8
        #-----------------------------------------------------------------------#
        stride_h = self.input_shape[0] / in_h
        stride_w = self.input_shape[1] / in_w
        #-------------------------------------------------#
        #   此时获得的scaled_anchors大小是相对于特征层的
        #-------------------------------------------------#
        scaled_anchors  = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]#三个特征层,每个特征层有3个anchor
        #-----------------------------------------------#
        #   输入的input一共有三个,他们的shape分别是
        #   bs, 3 * (5+num_classes), 20, 20 => bs, 3, 5 + num_classes, 20, 20 => batch_size, 3, 20, 20, 5 + num_classes

        #   batch_size, 3, 20, 20, 5 + num_classes
        #   batch_size, 3, 40, 40, 5 + num_classes
        #   batch_size, 3, 80, 80, 5 + num_classes
        #-----------------------------------------------#
        prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
        
        #-----------------------------------------------#
        #   先验框的中心位置的调整参数
        #   prediction:
                        #   batch_size, 3, 20, 20, 5 + num_classes
                        #   batch_size, 3, 40, 40, 5 + num_classes
                        #   batch_size, 3, 80, 80, 5 + num_classes
        #-----------------------------------------------#
        x = torch.sigmoid(prediction[..., 0])
        y = torch.sigmoid(prediction[..., 1])
        #-----------------------------------------------#
        #   先验框的宽高调整参数
        #-----------------------------------------------#
        w = torch.sigmoid(prediction[..., 2]) 
        h = torch.sigmoid(prediction[..., 3]) 
        #-----------------------------------------------#
        #   获得置信度,是否有物体
        #-----------------------------------------------#
        conf = torch.sigmoid(prediction[..., 4])#[4, 3, 80, 80]
        # print(">>>>>>>>>>>>>>>conf",conf.shape,conf)
        #-----------------------------------------------#
        #   种类置信度
        #-----------------------------------------------#
        pred_cls = torch.sigmoid(prediction[..., 5:])#[4, 3, 80, 80, 1]
        # print(">>>>>>>>>>>>>>>pred_cls",pred_cls.shape ,pred_cls)
        #-----------------------------------------------#
        #   self.get_target已经合并到dataloader中
        #   原因是在这里执行过慢,会大大延长训练时间
        #-----------------------------------------------#
        # y_true, noobj_mask = self.get_target(l, targets, scaled_anchors, in_h, in_w)

        #---------------------------------------------------------------#
        #   将预测结果进行解码,判断预测结果和真实值的重合程度
        #   如果重合程度过大则忽略,因为这些特征点属于预测比较准确的特征点
        #   作为负样本不合适
        #----------------------------------------------------------------#
        pred_boxes = self.get_pred_boxes(l, x, y, h, w, targets, scaled_anchors, in_h, in_w)

        if self.cuda:
            y_true          = y_true.type_as(x)
            #print(">>>>>>>>>>>>>>>x",x)
            #print("..........y_true",y_true)
        
        loss    = 0
        n       = torch.sum(y_true[..., 4] == 1)
        #print(">>>>>>>>>>>>>>",n)
        if n != 0:
            #---------------------------------------------------------------#
            #   计算预测结果和真实结果的giou,计算对应有真实框的先验框的giou损失
            #                         loss_cls计算对应有真实框的先验框的分类损失
            #----------------------------------------------------------------#
            giou        = self.box_giou(pred_boxes, y_true[..., :4]).type_as(x)#交叉熵损失,[4, 3, 40, 40],[4, 3, 80, 80]
            #print(">>>>>>>>>>>>>>>giou",giou.shape,giou)
            loss_loc    = torch.mean((1 - giou)[y_true[..., 4] == 1])
            loss_cls    = torch.mean(self.BCELoss(pred_cls[y_true[..., 4] == 1], self.smooth_labels(y_true[..., 5:][y_true[..., 4] == 1], self.label_smoothing, self.num_classes)))
            loss        += loss_loc * self.box_ratio + loss_cls * self.cls_ratio
            #-----------------------------------------------------------#
            #   计算置信度的loss
            #   也就意味着先验框对应的预测框预测的更准确
            #   它才是用来预测这个物体的。
            #-----------------------------------------------------------#
            tobj        = torch.where(y_true[..., 4] == 1, giou.detach().clamp(0), torch.zeros_like(y_true[..., 4]))
        else:
            tobj        = torch.zeros_like(y_true[..., 4])
        loss_conf   = torch.mean(self.BCELoss(conf, tobj))
        ss=self.BCELoss(conf, tobj)#[4, 3, 20, 20],[4, 3, 40, 40],[4, 3, 80, 80]
        #print(">>>>>>>>>>>>ss",ss.shape)
        
        loss        += loss_conf * self.balance[l] * self.obj_ratio
        # if n != 0:
        #     print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio)
        #print(">>>>>>>>>>>>>>loss",loss)
        return loss

        

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值