一、损失函数原理
二、代码注释
代码注释如下:
import math from copy import deepcopy from functools import partial import numpy as np import torch import torch.nn as nn class YOLOLoss(nn.Module): def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0): super(YOLOLoss, self).__init__() #-----------------------------------------------------------# # 20x20的特征层对应的anchor是[116,90],[156,198],[373,326] # 40x40的特征层对应的anchor是[30,61],[62,45],[59,119] # 80x80的特征层对应的anchor是[10,13],[16,30],[33,23] #-----------------------------------------------------------# self.anchors = anchors self.num_classes = num_classes self.bbox_attrs = 5 + num_classes self.input_shape = input_shape self.anchors_mask = anchors_mask self.label_smoothing = label_smoothing self.threshold = 4 self.balance = [0.4, 1.0, 4] self.box_ratio = 0.05 self.obj_ratio = 1 * (input_shape[0] * input_shape[1]) / (640 ** 2) self.cls_ratio = 0.5 * (num_classes / 80) self.cuda = cuda def clip_by_tensor(self, t, t_min, t_max): t = t.float() result = (t >= t_min).float() * t + (t < t_min).float() * t_min result = (result <= t_max).float() * result + (result > t_max).float() * t_max return result #均方损失函数:实际值减去预测值的平方 def MSELoss(self, pred, target): return torch.pow(pred - target, 2) #交叉熵损失:loss=-ylog(pred)-(1-y)log(1-pred) def BCELoss(self, pred, target): epsilon = 1e-7 pred = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon) output = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred) return output #置信损失函数 def box_giou(self, b1, b2): """ 输入为: ---------- b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh (x,y)为坐标 (w,h)为预测框的宽和长 b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh (x,y)为坐标 (w,h)为真实框的宽和长 返回为: ------- giou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) """ #----------------------------------------------------# # 求出预测框左上角右下角 #----------------------------------------------------# #b1:[4, 3, 80, 80, 4],[4, 3, 40, 40, 4],[4, 3, 20, 20, 4] b1_xy = b1[..., :2]#(x,y):三种网格的每个格的左上角的坐标 b1_wh = b1[..., 2:4]#(w,h):三种网格的每个格的宽和高 b1_wh_half = b1_wh/2.#取三种网格的宽和高的一半 b1_mins = b1_xy - b1_wh_half#格子左上角向左或者向上平移半个格子的长度 b1_maxes = b1_xy + b1_wh_half#格子左上角向右或者向下平移半个格子的长度 # print(">>>>>>>>>>>>>>>>>>>>>b1",b1.shape) # print(">>>>>>>>>>>>>>>>>>>>>b", b1[..., :2]) #----------------------------------------------------# # 求出真实框左上角右下角 #----------------------------------------------------# #以下真实框和预测框的注释相同 b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half #----------------------------------------------------# # 求真实框和预测框所有的iou(置信度) #----------------------------------------------------# #iou=(AnB)/(AUB),其中A表示预测框,B表示真实框,式子表示(真实框与预测框的交集的面积)除以(真实框与预测框的并集的面积) intersect_mins = torch.max(b1_mins, b2_mins) intersect_maxes = torch.min(b1_maxes, b2_maxes) intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes)) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]#真实框与预测框的交集的面积 b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area#真实框与预测框的并集的面积 iou = intersect_area / union_area #----------------------------------------------------# # 找到包裹两个框的最小框的左上角和右下角 #----------------------------------------------------# enclose_mins = torch.min(b1_mins, b2_mins)#[4, 3, 40, 40, 2],[4, 3, 80, 80, 2] enclose_maxes = torch.max(b1_maxes, b2_maxes) #print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>enclose_mins",enclose_mins.shape) enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))#[4, 3, 40, 40, 2],[4, 3, 80, 80, 2] #print("-------------enclose_wh",enclose_wh.shape,enclose_wh) #----------------------------------------------------# # 计算对角线距离 #----------------------------------------------------# enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]#enclose_area:表示真实框和预测框的最左上角的点和最右下角的点围成的矩形的面积,具体看YOLOV5的置信损失函数 giou = iou - (enclose_area - union_area) / enclose_area return giou #---------------------------------------------------# # 平滑标签 #---------------------------------------------------# #平滑标签的目的是让神经网络不那么自信,例如,softmax的结果:(1,0)->[1,0]*(1-0.1)+0.05=[0.95,0.05],在这里,label_smoothing=0.1,num_classes=2 #label_smoothing:平滑标签系数; num_classes:预测的目标类别数量 def smooth_labels(self, y_true, label_smoothing, num_classes): return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes def forward(self, l, input, targets=None, y_true=None): #----------------------------------------------------# # l 代表使用的是第几个有效特征层 # input的shape为 bs, 3*(5+num_classes), 20, 20 # bs, 3*(5+num_classes), 40, 40 # bs, 3*(5+num_classes), 80, 80 # targets 真实框的标签情况 [batch_size, num_gt, 5] #----------------------------------------------------# #--------------------------------# # 获得图片数量,特征层的高和宽 # 20, 20 #--------------------------------# bs = input.size(0) in_h = input.size(2) in_w = input.size(3) #-----------------------------------------------------------------------# # 计算步长 # 每一个特征点对应原来的图片上多少个像素点 # [640, 640] 高的步长为640 / 20 = 32,宽的步长为640 / 20 = 32 # 如果特征层为20x20的话,一个特征点就对应原来的图片上的32个像素点 # 如果特征层为40x40的话,一个特征点就对应原来的图片上的16个像素点 # 如果特征层为80x80的话,一个特征点就对应原来的图片上的8个像素点 # stride_h = stride_w = 32、16、8 #-----------------------------------------------------------------------# stride_h = self.input_shape[0] / in_h stride_w = self.input_shape[1] / in_w #-------------------------------------------------# # 此时获得的scaled_anchors大小是相对于特征层的 #-------------------------------------------------# scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]#三个特征层,每个特征层有3个anchor #-----------------------------------------------# # 输入的input一共有三个,他们的shape分别是 # bs, 3 * (5+num_classes), 20, 20 => bs, 3, 5 + num_classes, 20, 20 => batch_size, 3, 20, 20, 5 + num_classes # batch_size, 3, 20, 20, 5 + num_classes # batch_size, 3, 40, 40, 5 + num_classes # batch_size, 3, 80, 80, 5 + num_classes #-----------------------------------------------# prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() #-----------------------------------------------# # 先验框的中心位置的调整参数 # prediction: # batch_size, 3, 20, 20, 5 + num_classes # batch_size, 3, 40, 40, 5 + num_classes # batch_size, 3, 80, 80, 5 + num_classes #-----------------------------------------------# x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) #-----------------------------------------------# # 先验框的宽高调整参数 #-----------------------------------------------# w = torch.sigmoid(prediction[..., 2]) h = torch.sigmoid(prediction[..., 3]) #-----------------------------------------------# # 获得置信度,是否有物体 #-----------------------------------------------# conf = torch.sigmoid(prediction[..., 4])#[4, 3, 80, 80] # print(">>>>>>>>>>>>>>>conf",conf.shape,conf) #-----------------------------------------------# # 种类置信度 #-----------------------------------------------# pred_cls = torch.sigmoid(prediction[..., 5:])#[4, 3, 80, 80, 1] # print(">>>>>>>>>>>>>>>pred_cls",pred_cls.shape ,pred_cls) #-----------------------------------------------# # self.get_target已经合并到dataloader中 # 原因是在这里执行过慢,会大大延长训练时间 #-----------------------------------------------# # y_true, noobj_mask = self.get_target(l, targets, scaled_anchors, in_h, in_w) #---------------------------------------------------------------# # 将预测结果进行解码,判断预测结果和真实值的重合程度 # 如果重合程度过大则忽略,因为这些特征点属于预测比较准确的特征点 # 作为负样本不合适 #----------------------------------------------------------------# pred_boxes = self.get_pred_boxes(l, x, y, h, w, targets, scaled_anchors, in_h, in_w) if self.cuda: y_true = y_true.type_as(x) #print(">>>>>>>>>>>>>>>x",x) #print("..........y_true",y_true) loss = 0 n = torch.sum(y_true[..., 4] == 1) #print(">>>>>>>>>>>>>>",n) if n != 0: #---------------------------------------------------------------# # 计算预测结果和真实结果的giou,计算对应有真实框的先验框的giou损失 # loss_cls计算对应有真实框的先验框的分类损失 #----------------------------------------------------------------# giou = self.box_giou(pred_boxes, y_true[..., :4]).type_as(x)#交叉熵损失,[4, 3, 40, 40],[4, 3, 80, 80] #print(">>>>>>>>>>>>>>>giou",giou.shape,giou) loss_loc = torch.mean((1 - giou)[y_true[..., 4] == 1]) loss_cls = torch.mean(self.BCELoss(pred_cls[y_true[..., 4] == 1], self.smooth_labels(y_true[..., 5:][y_true[..., 4] == 1], self.label_smoothing, self.num_classes))) loss += loss_loc * self.box_ratio + loss_cls * self.cls_ratio #-----------------------------------------------------------# # 计算置信度的loss # 也就意味着先验框对应的预测框预测的更准确 # 它才是用来预测这个物体的。 #-----------------------------------------------------------# tobj = torch.where(y_true[..., 4] == 1, giou.detach().clamp(0), torch.zeros_like(y_true[..., 4])) else: tobj = torch.zeros_like(y_true[..., 4]) loss_conf = torch.mean(self.BCELoss(conf, tobj)) ss=self.BCELoss(conf, tobj)#[4, 3, 20, 20],[4, 3, 40, 40],[4, 3, 80, 80] #print(">>>>>>>>>>>>ss",ss.shape) loss += loss_conf * self.balance[l] * self.obj_ratio # if n != 0: # print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio) #print(">>>>>>>>>>>>>>loss",loss) return loss