calculation the loss of rpn
coordnates input:prediction_bbox,anchor,ground_truth
coordnates sizes:[WH9,4],[WH9,4],[num_objection,1]
coordnates orginal struct:(x1,y1,x2,y2)
coordnates reshape struct:(x,y,w,h)
probability input:[WH9,2],2:[fore,back]
label input:[WH9,1],1:(-1,0,1)
label_bias_p size:[WH9,1],(0,1,1)
label_bias_c size:[WH9,1],(0,0,1)
ground_truth_label input:[WH9,1],(0,1,…,num_obeject-1)
total input:prediction_bbox,anchor,ground_truth
total input:probability,label,ground_truth_label
loss function:log loss\smooth l1 loss
本次程序实现的是RPN LOSS,以上是程序的备注,IOU的计算是anchor与ground truth之间计算完成的,而非predict_bbox和ground truth之间计算的。在实际编程中,数据的输入不一定是直接满足需求的,在不满足的条件下需要进行自我调整以方便后续程序的设计和执行。在Python的编程过程中,有很多程序的功能已经存在可以使用包或者函数,不必自我编写,可以通过搜索功能或函数API来完成对函数的了解和编程,对于不够了解的函数应通过实践来实现。
本文处理的数据均为张量的形式,但由于TensorFlow不完全是数学计算的库,因此在有些条件下可以调用numpy实现功能的完成。本文用到的函数有:tf.zeros_like/tf.ones_like , tf.expand_dims , tf.abs , tf.reduce_sum , tf.reduce_mean , np.where。
import tensorflow as tf
import numpy as np
class rpn_loss(object):
def __init__(self, prediction_bbox, anchor, ground_truth, probability, label, ground_truth_label,label_gt_order):
self.prediction_bbox = prediction_bbox
self.anchor = anchor
self.ground_truth = ground_truth
self.probability = probability
self.label = label
self.ground_truth_label = ground_truth_label
self.label_gt_order=label_gt_order
def reconsitution_coords(self):
self.re_prediction_bbox = tf.zeros_like(self.prediction_bbox)
self.re_anchor = tf.zeros_like(self.anchor)
self.re_ground_truth = tf.zeros_like(self.ground_truth)
prediction_bbox_x1 = self.prediction_bbox[:, 0]
prediction_bbox_y1 = self.prediction_bbox[:, 1]
prediction_bbox_x2 = self.prediction_bbox[:, 2]
prediction_bbox_y2 = self.prediction_bbox[:, 3]
self.re_prediction_bbox[:, 0] = tf.abs(
prediction_bbox_x2+prediction_bbox_x1)/2
self.re_prediction_bbox[:, 1] = tf.abs(
prediction_bbox_y2+prediction_bbox_y1)/2
self.re_prediction_bbox[:, 2] = tf.abs(
prediction_bbox_x2-prediction_bbox_x1)
self.re_prediction_bbox[:, 3] = tf.abs(
prediction_bbox_y2-prediction_bbox_y1)
anchor_x1 = self.anchor[:, 0]
anchor_y1 = self.anchor[:, 1]
anchor_x2 = self.anchor[:, 2]
anchor_y2 = self.anchor[:, 3]
self.re_anchor[:, 0] = tf.abs(anchor_x2+anchor_x1)/2
self.re_anchor[:, 1] = tf.abs(anchor_y2+anchor_y1)/2
self.re_anchor[:, 2] = tf.abs(anchor_x2-anchor_x1)
self.re_anchor[:, 3] = tf.abs(anchor_y2-anchor_y1)
self.anchor_weight = tf.ones_like(self.re_anchor)
self.anchor_weight[:,0]=1.0/self.re_anchor[:, 2]
self.anchor_weight[:,1]=1.0/self.re_anchor[:, 3]
ground_truth_x1 = self.ground_truth[:, 0]
ground_truth_y1 = self.ground_truth[:, 1]
ground_truth_x2 = self.ground_truth[:, 2]
ground_truth_y2 = self.ground_truth[:, 3]
#dimension(1)is number of gt
self.re_ground_truth[:, 0] = tf.abs(ground_truth_x1+ground_truth_x2)/2
self.re_ground_truth[:, 1] = tf.abs(ground_truth_y1+ground_truth_y2)/2
self.re_ground_truth[:, 2] = tf.abs(ground_truth_x1-ground_truth_x2)
self.re_ground_truth[:, 3] = tf.abs(ground_truth_y1-ground_truth_y2)
self.label_gt_order=tf.expand_dims(self.label_gt_order,axis=1)
self.re_label_gt_order=self.re_ground_truth[self.label_gt_order[:,0]]
self.probability_fore = self.re_prediction_bbox[:, 1]
self.label_dim2 = tf.expand_dims(self.label, axis=1)
sess = tf.Session()
self.label_weight_p = np.where(sess.run(self.label+1), 1, 0)
self.label_weight_c = np.where(sess.run(self.label-1), 0, 1)
self.label_weight_p = tf.expand_dims(self.label_weight_p, axis=1)
self.label_weight_c = tf.expand_dims(self.label_weight_c, axis=1)
self.label = tf.expand_dims(self.label, axis=1)
def class_loss(self, p_pred, label, weight):
# sparse_softmax_cross_entropy_with_logits whether to use
l_loss = label*tf.log(p_pred)+(1-label)*tf.log(1-p_pred)
l_loss_sum = tf.reduce_mean(tf.reduce_sum(l_loss*weight))
return l_loss_sum
def smooth_l1_loss(self, bbox_predicted, bbox_ground_truth, w_h_1_1, weight, lmd=10, sigma=1.0, dim2mean=1):
# if the 4 figures of bbox have been calculated
bbox_diff = bbox_ground_truth-bbox_predicted
t_diff = bbox_diff*weight*w_h_1_1
t_diff_abs = tf.abs(t_diff)
compare_1 = tf.stop_gradient(tf.to_float(tf.less(t_diff_abs, 1.0)))
sl_loss_box = 0.5*(compare_1*tf.pow(t_diff_abs, 2)) + \
(1-compare_1)*(t_diff_abs-0.5)
sum_loss_box = tf.reduce_sum(sl_loss_box, axis=dim2mean)
# sess=tf.Session()
#sum_loss_box=sess.run(tf.reduce_sum(sl_loss_box, axis=dim2mean))
loss_box = sum_loss_box*lmd
return loss_box
def add_loss(self):
self.log_loss = self.class_loss(
self.probability_fore, self.label, self.label_weight_p)
self.reg_loss = self.smooth_l1_loss(
self.re_prediction_bbox, self.re_label_gt_order, self.anchor_weight, self.label_weight_c)