实验结果
注:实验均不交叉,单独在base 上实验
ps | model | map | delta |
---|---|---|---|
baseline | 001/19 | 84.75 | 0 |
focal loss a=0.25 r=2 | 002/41 | 86.16 | +1.41 |
L2损失 | 003/19 | 85.11 | +0.36 |
GIOU | 004/17 | 84.85 | +0.1 |
soft nms | 001/19 | 84.09 | -0.66 |
focal loss
参考:https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/losses.py
发现
α
\alpha
α分配给正负样本的不一样,一直以为
α
\alpha
α是为了调节置信度损失和其他损失(比如回归损失)的比例的,果然看懂论文和看懂代码是两件不相干的事情
关键代码如下
alpha=0.25
gamma=2.0
#正样本的focal 系数
alpha_factor = tf.ones_like(respond_bbox) * alpha
alpha_factor = tf.where(tf.equal(respond_bbox, 1), alpha_factor, tf.zeros_like(respond_bbox))
focal_weight = tf.where(tf.equal(respond_bbox, 1), 1 - tf.nn.sigmoid(conv_raw_conf), tf.zeros_like(respond_bbox))
pos_focal_weight = alpha_factor*focal_weight ** gamma
#负样本的focal系数
alpha_factor = tf.ones_like(respond_bgd) * alpha
alpha_factor = tf.where(tf.equal(respond_bgd, 1), 1-alpha_factor, tf.zeros_like(respond_bgd))
focal_weight = tf.where(tf.equal(respond_bgd, 1), tf.nn.sigmoid(conv_raw_conf), tf.zeros_like(respond_bgd))
neg_focal_weight = alpha_factor*focal_weight ** gamma
conf_loss = (
pos_focal_weight*respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+
neg_focal_weight*respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
)
L2 损失
类似Adaboost中的思想,如果预测值和真实值相差过大(难分样本),则应占有更大的比重,反之(易分样本)则是小比重
conf_loss = tf.pow(tf.abs(respond_bbox, pred_conf), 2)* (
respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+
respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
)
GIOU
def GIOU(boxes1, boxes2):
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
# 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标
intersection_left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
intersection_right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
# 因为两个boxes没有交集时,(right_down - left_up) < 0,所以maximum可以保证当两个boxes没有交集时,它们之间的iou为0
intersection = tf.maximum(intersection_right_down - intersection_left_up, 0.0)
inter_area = intersection[..., 0] * intersection[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
IOU = 1.0 * inter_area / union_area
#最小闭包 左上角坐标和右下角坐标
enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
enclose_area = enclose[..., 0] * enclose[..., 1]
GIOU = IOU - 1.0 * (enclose_area - union_area) / enclose_area
return GIOU
NMS / SOFT NMS
def nms(bboxes, score_threshold, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes:
假设有N个bbox的score大于score_threshold,那么bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class)
其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号
:return: best_bboxes
假设NMS后剩下N个bbox,那么best_bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class)
其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
#遍历每个类
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind] #拿到score最高的box
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) #去除最高score的其他box
iou = iou_calc1(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
assert method in ['nms', 'soft-nms']
weight = np.ones((len(iou),), dtype=np.float32)
if method == 'nms': #使用nms
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms': #使用softnms
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > score_threshold
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes