yolov4/yolov3预测代码解析
代码组成
预测部分由两步组成:
1.由特征图获得全部的预测框
2.NMS对所有的预测框进行处理
预测框解码
计算每个特征图和对应的anchors
该部分计算每个预测框的中心点坐标、wh、是否有目标的概率,目标类别的概率四个值。
def decode_feature(feature, anchor, width, height):
shape = tf.shape(feature)
shape = tf.cast(shape, tf.float32)
# [batch_size, 13, 13, 3, 5+class_num]
yi_pred = tf.reshape(feature, [shape[0], shape[1], shape[2], 3, -1])
# shape : [batch_size,13,13,3,2] [batch_size,13,13,3,2] [batch_size,13,13,3,1] [batch_size,13,13,3, class_num]
xy, wh, conf, prob = tf.split(yi_pred, [2, 2, 1, -1], axis=-1)
''' compute offset of x and y '''
offset_x = tf.range(shape[2], dtype=tf.float32) #width
offset_y = tf.range(shape[1], dtype=tf.float32) # height
offset_x, offset_y = tf.meshgrid(offset_x, offset_y)
offset_x = tf.reshape(offset_x, (-1, 1))
offset_y = tf.reshape(offset_y, (-1, 1))
offset_xy = tf.concat([offset_x, offset_y], axis=-1)
# [13, 13, 1, 2]
offset_xy = tf.reshape(offset_xy, [shape[1], shape[2], 1, 2])
xy = tf.math.sigmoid(xy) + offset_xy
xy = xy / [shape[2], shape[1]]
wh = tf.math.exp(wh) * anchor
wh = wh / [width, height]
conf = tf.math.sigmoid(conf)
prob = tf.math.sigmoid(prob)
return xy, wh, conf, prob
Reshape和特征图合并
该部分对boxes、conf、prob分别reshape为[batch_size, 13x13x3, 4]、[batch_size, 13x13x3, 1]、[batch_size, 13x13x3, class_num],并对三个featuremap进行合并。
def reshape(xy, wh, conf, prob):
# [1, 13, 13, 3, 1]
x_min = xy[..., 0: 1] - wh[..., 0: 1] / 2.0
x_max = xy[..., 0: 1] + wh[..., 0: 1] / 2.0
y_min = xy[..., 1: 2] - wh[..., 1: 2] / 2.0
y_max = xy[..., 1: 2] + wh[..., 1: 2] / 2.0
# [1, 13, 13, 3, 4]
boxes = tf.concat([x_min, y_min, x_max, y_max], -1)
shape = tf.shape(boxes)
# [1, 13*13*3, 4]
boxes = tf.reshape(boxes, (shape[0], shape[1] * shape[2]* shape[3], shape[4]))
# [1, 13 * 13 * 3, 1]
conf = tf.reshape(conf, (shape[0], shape[1] * shape[2]* shape[3], 1))
# [1, 13*13*3, class_num]
prob = tf.reshape(prob, (shape[0], shape[1] * shape[2]* shape[3], -1))
return boxes, conf, prob
# reshape
# [batch_size, 13*13*3, 4], [batch_size, 13*13*3, 1], [batch_size, 13*13*3, class_num]
boxes_1, conf_1, prob_1 = reshape(xy1, wh1, conf1, prob1)
boxes_2, conf_2, prob_2 = reshape(xy2, wh2, conf2, prob2)
boxes_3, conf_3, prob_3 = reshape(xy3, wh3, conf3, prob3)
# gather
# [1, 13*13*3, 4] & [1, 26*26*3, 4] ==> [1, V, 4]
boxes = tf.concat([boxes_1, boxes_2, boxes_3], 1)
conf = tf.concat([conf_1, conf_2, conf_3], 1)
prob = tf.concat([prob_1, prob_2, prob_3], 1)
return boxes, conf, prob
NMS处理
针对每一类别,按score进行非极大值抑制,根据设定的score、iou、maxboxes阈值进行筛选,最终得到每一类别的预测框。
def nms(boxes, scores, class_num, max_boxes=50, score_thresh=0.5, iou_threshold=0.5):
boxes_list, label_list, score_list = [], [], []
max_boxes = tf.constant(max_boxes, dtype='int32')
# [V, 4]
boxes = tf.reshape(boxes, [-1, 4])
# [V, class_num]
score = tf.reshape(scores, [-1, class_num])
mask = tf.greater_equal(score, tf.constant(score_thresh))
for i in range(class_num):
# Step 3: Apply the mask to scores, boxes and pick them out
filter_boxes = tf.boolean_mask(boxes, mask[:,i])
filter_score = tf.boolean_mask(score[:,i], mask[:,i])
nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
scores=filter_score,
max_output_size=max_boxes,
iou_threshold=iou_threshold, name='nms_indices')
label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)
boxes_list.append(tf.gather(filter_boxes, nms_indices))
score_list.append(tf.gather(filter_score, nms_indices))
# stack
boxes = tf.concat(boxes_list, axis=0)
score = tf.concat(score_list, axis=0)
label = tf.concat(label_list, axis=0)
return boxes, score, label