前言
最近在做yolo检测模型的8bit落地工作,因需求手撸了优图的yolov2的loss,做个记录方便日后查看。
本篇只介绍核心的loss部分,其余详细的代码放到github上面去了,详见https://github.com/XhtZz/yolo-v2-loss
loss详解
def yolo_loss( target,
output,
coord_mask, object_detections, object_no_detections_gt_anch, gt_coord, gt_conf,
object_scale=5.0,
no_object_scale=1.0, coordinates_scale=3.0
):
#共有5个anchor,16为网路输入到输出的下采样倍率,由于最后的iou计算以输出feature map为基准
#所以除以16,五个anchor等长宽分别为1,3,5,7,10
anchors = tf.constant([[16, 16], [48, 48], [80, 80], [112,112], [160,160]], tf.float32) / tf.constant(16.0)
anchors_num = tf.shape(anchors)[0]
output_shape = tf.shape(output) #(n,c,h,w)
batch_size = output_shape[0]
nH = output_shape[2]
nW = output_shape[3]
#reshape output as [batch,anchor,x-y-w-h-conf,w*h]
output = tf.reshape(output,[batch_size, anchors_num, -1, nW*nH])
#将output的x、y归一化到0-1
coord_xy = tf.sigmoid(output[:, :, :2]) # x,y
coord_wh = output[:, :, 2:4] # w,h
pre_coord = tf.concat([coord_xy,coord_wh], 2)
#将置信度归一化到0-1
pre_conf = tf.sigmoid(output[:, :, 4])
# Create prediction boxes
lin_x = tf.reshape(tf.tile(tf.reshape(tf.linspace(0.0, tf.cast(nW - 1,tf.float32), nW),[1,nW]),[nH,1]),[nW*nH])
lin_y = tf.reshape(tf.tile(tf.reshape(tf.linspace(0.0, tf.cast(nH - 1,tf.float32), nH),[nH,1]),[1,nW]),[nW*nH])
#将归一化后的x、y(相当于bias)分别加上对应所在网格的x、y整数值
pred_boxes_x = tf.reshape(pre_coord[:, :, 0] + lin_x,[-1,1])
pred_boxes_y = tf.reshape(pre_coord[:, :, 1] + lin_y,[-1,1])
#解码过程 对应于后面的编码过程
anchor_w = tf.reshape(anchors[:, 0],[anchors_num,1])
anchor_h = tf.reshape(anchors[:, 1],[anchors_num,1])
pred_boxes_w = tf.reshape(tf.exp(pre_coord[:, :, 2]) * anchor_w,[-1,1])
pred_boxes_h = tf.reshape(tf.exp(pre_coord[:, :, 3]) * anchor_h,[-1,1])
#x,y,w,h
#将predict boxes reshape成anchors*x*y行4列的tensor(4列指x,y,w,h)
pred_boxes = tf.concat([pred_boxes_x,pred_boxes_y,pred_boxes_w,pred_boxes_h],1)
#根据predict box及groundtruth box计算iou获得一定没有物体所在的网格(每个网格点代表一个box)
object_no_detections_gt_pre = build_targets_masks(pred_boxes, target, batch_size, anchors_num, nH, nW, thresh = 0.6, reduction = 16.0)
#object_no_detections_gt_anch为传入参数,由gt所在的best anchor位置的x,y确定,乘以
#object_no_detections_gt_pre后将object_no_detections_gt_anch为0的位置强制置零,即gt
#的位置no_object一定为0
object_no_detections = object_no_detections_gt_pre * object_no_detections_gt_anch
# coord
#coord_mask为2-gt_w*gt_h/w*h,为小框加大系数,使小框更容易检测到,坐标相关的loss都要乘
coord_mask = tf.tile(coord_mask,[1,1,2,1])
pre_coord_center, gt_coord_center = pre_coord[:, :, :2], gt_coord[:, :, :2]
pre_coord_wh, gt_coord_wh = pre_coord[:, :, 2:], gt_coord[:, :, 2:]
# Compute losses
#中心点loss使用二值交叉熵loss,宽高loss采用smooth l1 loss
loss_coord_center = 2.0 * 1.0 * coordinates_scale * tf.reduce_sum(coord_mask * tf.keras.backend.binary_crossentropy(gt_coord_center, pre_coord_center))
loss_coord_wh = 2.0 * 1.5 * coordinates_scale * tf.reduce_sum(coord_mask * tf.losses.huber_loss(gt_coord_wh, pre_coord_wh, reduction=tf.losses.Reduction.NONE))
loss_coord = loss_coord_center + loss_coord_wh
#正负样本置信度均采用二值交叉熵loss
loss_conf_pos = 1.0 * object_scale * tf.reduce_sum(object_detections * tf.keras.backend.binary_crossentropy(gt_conf, pre_conf))
loss_conf_neg = 1.0 * no_object_scale * tf.reduce_sum(object_no_detections * tf.keras.backend.binary_crossentropy(gt_conf, pre_conf))
loss_conf = loss_conf_pos + loss_conf_neg
loss_tot = (loss_coord + loss_conf) / tf.cast(batch_size,tf.float32)
return loss_tot
loss设计逻辑图
loss的整个设计我用图画出来了,好好理解一下应该不难,从左向右从上到下对应上面代码好好看一下吧。。。。