YoloV2网络损失函数代码粗解

最新推荐文章于 2024-07-12 19:58:07 发布

zh_JNU

最新推荐文章于 2024-07-12 19:58:07 发布

阅读量1.3k

点赞数 2

分类专栏： keras

本文链接：https://blog.csdn.net/zh_JNU/article/details/84769994

版权

keras 专栏收录该内容

19 篇文章 1 订阅

订阅专栏

这篇接上面文章来简单注释下YoloV2损失函数的定义，这部分比较主要。有了定义网络的输入输出和损失函数，我们就可以自己构建自己的网络或迁移其他网络来实现自己想要的识别定位的物体了。该函数来自于frontend.py的部分核心代码。

def custom_loss(self, y_true, y_pred):

########传递的参数就是真实值和预测输出值
        ##########y_ture.shape=(None,13,13,5,6) == y_pred.shape   6== x_,y_,w_,h_,confi,classes
        mask_shape = tf.shape(y_true)[:4]##  value = (None,13,13,5)
        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))
        cell_y = tf.transpose(cell_x, (0,2,1,3,4))
        #####cell_x.shape = 1,13,13,1,1   tf.concat([cell_x,cell_y], -1).shape=(1,13,13,1,2)
        cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])
        #####cell_grid.shape = (None,13,13,5,2)   上述代码的结果就是建立网格，用于接下来计算x，y的偏移量
        coord_mask = tf.zeros(mask_shape)####shape=(None,13,13,5)
        conf_mask  = tf.zeros(mask_shape)
        class_mask = tf.zeros(mask_shape)
          ####上篇文章说的b_batch在这里作用就是计算损失时设置掩码。 另外一点，该网络的输入有两个，一个是原图，一个就是原图对应的真实框，也就是b_batch。 这里设置计算坐标，置信度，和分类损失的掩码。shape都是（None，13，13，5），None就是batch_size大小。
        seen = tf.Variable(0.)
        total_recall = tf.Variable(0.)
        ####这连个变量就是记录召回率和用于确定不同的训练参数


        """
        Adjust prediction
        """
        ### adjust x and y      
        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid###shape=(None,13,13,5,2)
        ###########这里预测框x，y中心点     预测x_，y_的偏移 加上 网格的中心点   相对13*13坐标网格，每一个格的偏移
        ### adjust w and h
        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1,1,1,self.nb_box,2])

####################预测框的w，h    是预测值的指数幂乘以先验确定的5个框得出来的。相对5个框

############这里面预测的都是相对固定位置的数值，和其他直接预测x，y，w，h算法不一样
        ####shape = (None,13,13,5,2)
        ### adjust confidence
        pred_box_conf = tf.sigmoid(y_pred[..., 4])
        #######shape = (None,13,13,5)
        ### adjust class probabilities
        pred_box_class = y_pred[..., 5:]
        ########shape =(None,13,13,classes_num)

#########以上获取   预测的框、置信度以及所属类别    用于计算loss   下面会进行确定掩码
        """
        Adjust ground truth
        """
        ### adjust x and y
        true_box_xy = y_true[..., 0:2] # relative position to the containing cell
        #####shape= (None,13,13,5,2)
        ### adjust w and h
        true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
        #######shape = (None,13,13,5,2)
        ### adjust confidence
        true_wh_half = true_box_wh / 2.
        true_mins    = true_box_xy - true_wh_half
        true_maxes   = true_box_xy + true_wh_half
        ####shape=(None,13,13,5,2)
        pred_wh_half = pred_box_wh / 2.
        pred_mins    = pred_box_xy - pred_wh_half
        pred_maxes   = pred_box_xy + pred_wh_half
        ####shape=(None,13,13,5,2)
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        ####shape=(None,13,13,5,2)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        ####shape=(None,13,13,5,2)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        ####shape=(None,13,13,5)
        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]
        ####shape=(None,13,13,5)
        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)

####shape=(None,13,13,5)

#############以上是计算预测值和实际值的IOU分数   每一步的shape变化都注明了。 两个框的交集除以并集，预测框和实际标注的框的分数
        true_box_conf = iou_scores * y_true[..., 4]
        ####shape=(None,13,13,5)      根据IOU得出置信分数
        ### adjust class probabilities
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        ####shape=(None,13,13,5) ####返回类别索引号
        """
        Determine the masks
        """
        ### coordinate mask: simply the position of the ground truth boxes (the predictors)
        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale
        #######shape = (None,13,13,5,1)   坐标掩码   使用实际label置信度作为坐标掩码，下面设置置信度掩码###################self.coord_scale=1
        true_xy = self.true_boxes[..., 0:2]
        true_wh = self.true_boxes[..., 2:4]
        #####true_boxes.shape = (1,1,1,10,4)
        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half

##############计算实际b_batch  也就是图片中实际框的相关信息，最后计算和预测label之间的IOU
        ###true_mins,true_maxes .shape = (1,1,1,10,2)
        pred_xy = tf.expand_dims(pred_box_xy, 4)
        pred_wh = tf.expand_dims(pred_box_wh, 4)
        ####pred_xy,pred_wh.shape = (None,13,13,5,1,2)
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
        pred_maxes   = pred_xy + pred_wh_half
        ######pred_mins,pred_maxes.shape = (None,13,13,5,1,2)
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        ######intersect_mins,intersect_maxes.shape = (None,13,13,5,10,2)

##############提取的是预测值与所有实际label框信息交集的最大最小值
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        #######intersect_wh.shape = (None,13,13,5,10,2)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        ####intersect_areas.shape = (None,13,13,5,10)
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        #####shape = (1,1,1,10)
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
        ###pred_areas.shape=(None,13,13,5,1)
        ############(pred_areas + true_areas).shape =(None,13,13,5,10)  并集的值
        union_areas = pred_areas + true_areas - intersect_areas
        #####union_areas.shape = (None,13,13,5,10)
        iou_scores  = tf.truediv(intersect_areas, union_areas)
        ######iou_scores.shape = ((None,13,13,5,10))    IOU的值
        best_ious = tf.reduce_max(iou_scores, axis=4)
        ######best_ious.shape = (None,13,13,5)       保存特征图13*13中每一点的与实际目标box 最佳的IOU的值  
        conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale
        ######不考虑 IOU<0.6的值  不进行loss计算
        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
        conf_mask = conf_mask + y_true[..., 4] * self.object_scale
        ##########加上 实际目标的IOU=1的值  作为最后的置信度掩码  self.no_object_scale=1   self.object_scale=4
        ### class mask: simply the position of the ground truth boxes (the predictors)
        ###tf.gather(self.class_wt, true_box_class).shape = true_box_class.shape = (None,13,13,5)
        class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale       
        #########类别掩码 和 坐标掩码 一样  使用实际值的置信度作为掩码

######下面是预训练
        """
        Warm-up training
        """
        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale/2.)
        seen = tf.assign_add(seen, 1.)
        
        true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), 
                              lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                       true_box_wh + tf.ones_like(true_box_wh) * \
                                       np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \
                                       no_boxes_mask, 
                                       tf.ones_like(coord_mask)],
                              lambda: [true_box_xy, 
                                       true_box_wh,
                                       coord_mask])
        
        """
        Finalize the loss
        """

#########最后的损失计算
        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
        nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
#######以上计算掩码的所有值的和          
        loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.

########x,y的损失就是简单的欧式距离*掩码   然后求和 除以掩码的和      结果除以2   w，h 和置信度都是一样的计算方式
        loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.

############类别损失，使用稀疏交叉熵函数计算类别损失  然后乘以掩码  结果除以掩码的和
        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
        loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
        ##########最后的损失就是三个loss相加。这里面预训练loss返回加10，预训练结束后返回正常三者之和
        loss = tf.cond(tf.less(seen, self.warmup_batches+1), 
                      lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
                      lambda: loss_xy + loss_wh + loss_conf + loss_class)
        
        if self.debug:
            nb_true_box = tf.reduce_sum(y_true[..., 4])
            nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))
            
            current_recall = nb_pred_box/(nb_true_box + 1e-6)
            total_recall = tf.assign_add(total_recall, current_recall) 

            loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
            loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
            loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
            loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
            loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
            loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
            loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
        
        return loss

zh_JNU

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
5
评论
YoloV2网络损失函数代码粗解

这篇接上面文章来简单注释下YoloV2损失函数的定义，这部分比较主要。有了定义网络的输入输出和损失函数，我们就可以自己构建自己的网络或迁移其他网络来实现自己想要的识别定位的物体了。该函数来自于frontend.py的部分核心代码。def custom_loss(self, y_true, y_pred):########传递的参数就是真实值和预测输出值 #...
复制链接

扫一扫