def _build_detector(self):
"""Interpret the net output and get the predicted boxes"""
# the width and height of orignal image
self.width = tf.placeholder(tf.float32, name="img_w")
self.height = tf.placeholder(tf.float32, name="img_h")
# get class prob, confidence, boxes from net output
idx1 = self.S * self.S * self.C
idx2 = idx1 + self.S * self.S * self.B
# class prediction
class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
# confidence
confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
# boxes -> (x, y, w, h)
boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])
# convert the x, y to the coordinates relative to the top left point of the image
# the predictions of w, h are the square root
# multiply the width and height of image
boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,
(boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,
tf.square(boxes[:, :, :, 2]) * self.width,
tf.square(boxes[:, :, :, 3]) * self.height], axis=3)
# class-specific confidence scores [S, S, B, C]
scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)
scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C]
boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]
# find each box class, only select the max score
box_classes = tf.argmax(scores, axis=1)
box_class_scores = tf.reduce_max(scores, axis=1)
# filter the boxes by the score threshold
filter_mask = box_class_scores >= self.threshold
scores = tf.boolean_mask(box_class_scores, filter_mask)
boxes = tf.boolean_mask(boxes, filter_mask)
box_classes = tf.boolean_mask(box_classes, filter_mask)
# non max suppression (do not distinguish different classes)
# ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
# box (x, y, w, h) -> box (x1, y1, x2, y2)
_boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],
boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
nms_indices = tf.image.non_max_suppression(_boxes, scores,
self.max_output_size, self.iou_threshold)
self.scores = tf.gather(scores, nms_indices)
self.boxes = tf.gather(boxes, nms_indices)
self.box_classes = tf.gather(box_classes, nms_indices)
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.