TensorFlow-SSD测试代码梳理,相信看过后对SSD的整个流程都一目了然了。
硬件:NVIDIA-GTX1080
软件:Windows7、python3.6.5、tensorflow-gpu-1.4.0
一、基础知识
1、anchor boxes (x, y, w, h)
1.1 x和y
例如38x38特征图输出,则对应原始图像300/38 = 8x8大小,则特征图[0]的x和y相对原始图像的大小为3和3,归一化(相对各自单元格)后就是0.5和0.5,特征图[1]的x和y相对原始图像大小为11和3,归一化后就是1.5和0.5,以此类推。。。
1.2 w和h
1.2.1 根据经验公式得出6层输出的anchor boxes边界框尺寸(对应300x300图像):
step= (max_ratio*100 - min_ratio*100) / (n_feat_layers - 2)
ratio = [min_ratio:step:max_ratio]
box_size[0] = 300*ratio / 100 ...(表示1:1)
box_size[1] = 300*(ratio+step) / 100 ...(表示1':1',1'表示边框比1大,但是比例还是1:1)
1.2.2 根据宽高比例得出6层输出对应的anchor boxes边界框大小(scale = 1:1, 1':1', 1:2, 2:1, 1:3, 3:1)
w[0] = box_size[0], h[0] = box_size[0] 归一化后 w[0] /= img_size[0], h[0] /= img_size[1]
w[1] = box_size[1], h[1] = box_size[1] 归一化后 w[1] /= img_size[0], h[1] /= img_size[1]
w[2:] = box_size[0] * sqrt(scale[2:]), h[2:] = box_size[0] / sqrt(scale[2:]) 归一化道理类似
1.3 边界框总个数
38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732
2、边界框相对anchor boxes回归
P for prediction, B for boundingbox, A for Anchor boxes, S for prior_scaling超参数
Px = (Bx - Ax)/(Aw * Sx) 则 Bx = Px * Aw * Sx + Ax,Py道理类似
Pw = ln(Bw / Aw) / Sw 则 Bw = exp(Pw * Sw) * Aw,Ph道理类似
3、与YOLO不同,SSD是直接将背景作为第21类进行分类学习,其分类的结果就是得分
二、代码展示(做了详细注释)
1.demo_ssd.py
"""
SSD demo
"""
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.image as mpimg
from ssd_300_vgg import SSD
from utils import preprocess_image, process_bboxes
from visualization import plt_bboxes
ssd_net = SSD()
classes, scores, bboxes = ssd_net.detections()
images = ssd_net.images()
sess = tf.Session()
# Restore SSD model.
ckpt_filename = './ssd_checkpoints/ssd_vgg_300_weights.ckpt'
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, ckpt_filename)
img = cv2.imread('./demo/dog.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_prepocessed = preprocess_image(img)
rclasses, rscores, rbboxes = sess.run([classes, scores, bboxes],
feed_dict={images: img_prepocessed})
rclasses, rscores, rbboxes = process_bboxes(rclasses, rscores, rbboxes)
plt_bboxes(img, rclasses, rscores, rbboxes)
2.ssd_300_vgg.py
"""
SSD net (vgg_based) 300x300
"""
from collections import namedtuple
import numpy as np
import tensorflow as tf
from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \
pad2d, ssd_multibox_layer
from ssd_anchors import ssd_anchors_all_layers
# SSD parameters
SSDParams = namedtuple('SSDParameters', ['img_shape', # the input image size: 300x300
'num_classes', # number of classes: 20+1
'no_annotation_label',
'feat_layers', # 输出各特征图名称
'feat_shapes', # 输出各特征图大小
'anchor_size_bounds', # 通过Smin和Smax计算anchor尺寸
'anchor_sizes', # anchor尺寸
'anchor_ratios', # anchor宽高比
'anchor_steps', # 单元块对应原图大小,如(38x38)对应(300/38 = 8x8)
'anchor_offset', # the center point offset
'normalizations', # list of normalizations of layer for detection
'prior_scaling' # 超参数variance,计算实际边框
])
class SSD(object):
"""SSD net 300"""
def __init__(self, is_training=True):
self.is_training = is_training
self.threshold = 0.5 # class score threshold
self.ssd_params = SSDParams(img_shape=(300, 300),
num_classes=21,
no_annotation_label=21,
feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"],
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
anchor_size_bounds=[0.15, 0.90], # diff from the original paper
# num1->1:1, num2->1:1(+step)
anchor_sizes=[(21., 45.),
(45., 99.),
(99., 153.),
(153., 207.),
(207., 261.),
(261., 315.)],
anchor_ratios=[[2, .5],
[2, .5, 3, 1. / 3],
[2, .5, 3, 1. / 3],
[2, .5, 3, 1. / 3],
[2, .5],
[2, .5]],
anchor_steps=[8, 16, 32, 64, 100, 300],
anchor_offset=0.5,
normalizations=[20, -1, -1, -1, -1, -1],
prior_scaling=[0.1, 0.1, 0.2, 0.2]
)
predictions, logits, locations = self._built_net()
#self._update_feat_shapes_from_net()
classes, scores, bboxes = self._bboxes_select(predictions, locations)
self._classes = classes
self._scores = scores
self._bboxes = bboxes
def _built_net(self):
"""Construct the SSD net"""
self.end_points = {} # record the detection layers output
self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0],
self.ssd_params.img_shape[1], 3])
with tf.variable_scope("ssd_300_vgg"):
# original vgg layers
# block 1
net = conv2d(self._images, 64, 3, scope="conv1_1")
net = conv2d(net, 64, 3, scope="conv1_2")
self.end_points["block1"] = net
net = max_pool2d(net, 2, scope="pool1")
# block 2
net = conv2d(net, 128, 3, scope="conv2_1")
net = conv2d(net, 128, 3, scope="conv2_2")
self.end_points["block2"] = net
net = max_pool2d(net, 2, scope="pool2")
# block 3
net = conv2d(net, 256, 3, scope="conv3_1")
net = conv2d(net, 256, 3, scope="conv3_2")
net = conv2d(net, 256, 3, scope="conv3_3")
self.end_points["block3"] = net
net = max_pool2d(net, 2, scope="pool3")
# block 4
net = conv2d(net, 512, 3, scope="conv4_1")
net = conv2d(net, 512, 3, scope="conv4_2")
net = conv2d(net, 512, 3, scope="conv4_3")
self.end_points["block4"] = net
net = max_pool2d(net, 2, scope="pool4")
# block 5
net = conv2d(net, 512, 3, scope="conv5_1")
net = conv2d(net, 512, 3, scope="conv5_2")
net = conv2d(net, 512, 3, scope="conv5_3")
self.end_points["block5"] = net
print(net)
net = max_pool2d(net, 3, stride=1, scope="pool5")
print(net)
# additional SSD layers
# block 6: use dilate conv
net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6")
self.end_points["block6"] = net
#net = dropout(net, is_training=self.is_training)
# block 7
net = conv2d(net, 1024, 1, scope="conv7")
self.end_points["block7"] = net
# block 8
net = conv2d(net, 256, 1, scope="conv8_1x1")
net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3",
padding="valid")
self.end_points["block8"] = net
# block 9
net = conv2d(net, 128, 1, scope="conv9_1x1")
net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3",
padding="valid")
self.end_points["block9"] = net
# block 10
net = conv2d(net, 128, 1, scope="conv10_1x1")
net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid")
self.end_points["block10"] = net
# block 11
net = conv2d(net, 128, 1, scope="conv11_1x1")
net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid")
self.end_points["block11"] = net
# class and location predictions
predictions = []
logits = []
locations = []
for i, layer in enumerate(self.ssd_params.feat_layers):
cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes,
self.ssd_params.anchor_sizes[i],
self.ssd_params.anchor_ratios[i],
self.ssd_params.normalizations[i], scope=layer+"_box")
predictions.append(tf.nn.softmax(cls))
logits.append(cls)
locations.append(loc)
return predictions, logits, locations
def _update_feat_shapes_from_net(self, predictions):
""" Obtain the feature shapes from the prediction layers"""
new_feat_shapes = []
for l in predictions:
new_feat_shapes.append(l.get_shape().as_list()[1:])
self.ssd_params._replace(feat_shapes=new_feat_shapes)
def anchors(self):
"""Get SSD anchors"""
return ssd_anchors_all_layers(self.ssd_params.img_shape,
self.ssd_params.feat_shapes,
self.ssd_params.anchor_sizes,
self.ssd_params.anchor_ratios,
self.ssd_params.anchor_steps,
self.ssd_params.anchor_offset,
np.float32)
def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling):
"""
Decode the feat location of one layer
params:
feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4]
anchor_bboxes: list of Tensors(y, x, w, h)
shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors]
prior_scaling: list of 4 floats
"""
yref, xref, href, wref = anchor_bboxes
print(yref)
# Compute center, height and width(feat_locations: None,38,38,4,4)
# P for prediction, B for boundingbox, A for Anchor boxes, S for prior_scaling超参数
# Px = (Bx - Ax)/(Aw * Sx) -> Bx = Px * Aw * Sx + Ax
# Pw = ln(Bw / Aw) / Sw -> Bw = exp(Pw * Sw) * Aw
cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref
w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2])
h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3])
# compute boxes coordinates (ymin, xmin, ymax,,xmax)
bboxes = tf.stack([cy - h / 2., cx - w / 2.,
cy + h / 2., cx + w / 2.], axis=-1)
# shape [batch_size, size, size, n_anchors, 4]
return bboxes
def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes,
prior_scaling):
"""Select boxes from the feat layer, only for bacth_size=1"""
# None,38,38,4,21
n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1])
# decode the location
bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling)
bboxes = tf.reshape(bboxes, [n_bboxes, 4])
predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes])
# remove the background predictions
sub_predictions = predictions[:, 1:]
# choose the max score class
classes = tf.argmax(sub_predictions, axis=1) + 1 # class labels
scores = tf.reduce_max(sub_predictions, axis=1) # max_class scores
# Boxes selection: use threshold
filter_mask = scores > self.threshold
classes = tf.boolean_mask(classes, filter_mask)
scores = tf.boolean_mask(scores, filter_mask)
bboxes = tf.boolean_mask(bboxes, filter_mask)
return classes, scores, bboxes
def _bboxes_select(self, predictions, locations):
"""Select all bboxes predictions, only for bacth_size=1"""
anchor_bboxes_list = self.anchors()
classes_list = []
scores_list = []
bboxes_list = []
# select bboxes for each feat layer
for n in range(len(predictions)):
# 如anchor_bboxes_list[0]: y(38,38,1), x(38,38,1), h(4,), w(4,)
anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n]))
classes, scores, bboxes = self._bboxes_select_layer(predictions[n],
locations[n], anchor_bboxes, self.ssd_params.prior_scaling)
classes_list.append(classes)
scores_list.append(scores)
bboxes_list.append(bboxes)
# combine all feat layers
classes = tf.concat(classes_list, axis=0)
scores = tf.concat(scores_list, axis=0)
bboxes = tf.concat(bboxes_list, axis=0)
return classes, scores, bboxes
def images(self):
return self._images
def detections(self):
return self._classes, self._scores, self._bboxes
'''
if __name__ == "__main__":
ssd = SSD()
sess = tf.Session()
saver_ = tf.train.Saver()
saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt")
'''
3.ssd_layers.py
"""
Layers for SSD
"""
import tensorflow as tf
# Conv2d: for stride = 1
def conv2d(x, filters, kernel_size, stride=1, padding="same",
dilation_rate=1, activation=tf.nn.relu, scope="conv2d"):
kernel_sizes = [kernel_size] * 2
strides = [stride] * 2
dilation_rate = [dilation_rate] * 2
return tf.layers.conv2d(x, filters, kernel_sizes, strides=strides,
dilation_rate=dilation_rate, padding=padding,
name=scope, activation=activation)
# max pool2d: default pool_size = stride
def max_pool2d(x, pool_size, stride=None, scope="max_pool2d"):
pool_sizes = [pool_size] * 2
strides = [pool_size] * 2 if stride is None else [stride] * 2
return tf.layers.max_pooling2d(x, pool_sizes, strides, name=scope, padding="same")
# pad2d: for conv2d with stride > 1
def pad2d(x, pad):
return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])
# dropout
def dropout(x, rate=0.5, is_training=True):
return tf.layers.dropout(x, rate=rate, training=is_training)
# l2norm (not bacth norm, spatial normalization)
def l2norm(x, scale, trainable=True, scope="L2Normalization"):
n_channels = x.get_shape().as_list()[-1]
l2_norm = tf.nn.l2_normalize(x, [3], epsilon=1e-12)
with tf.variable_scope(scope):
gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
initializer=tf.constant_initializer(scale),
trainable=trainable)
return l2_norm * gamma
# multibox layer: get class and location predicitions from detection layer
def ssd_multibox_layer(x, num_classes, sizes, ratios, normalization=-1, scope="multibox"):
# 38x38 19x19 ...
pre_shape = x.get_shape().as_list()[1:-1]
# Nonex38x38 Nonex19x19 ...
pre_shape = [-1] + pre_shape
with tf.variable_scope(scope):
# l2 norm
if normalization > 0:
x = l2norm(x, normalization)
print(x)
# numbers of anchors
n_anchors = len(sizes) + len(ratios)
# location predictions
loc_pred = conv2d(x, n_anchors*4, 3, activation=None, scope="conv_loc")
loc_pred = tf.reshape(loc_pred, pre_shape + [n_anchors, 4])
# class prediction
cls_pred = conv2d(x, n_anchors*num_classes, 3, activation=None, scope="conv_cls")
cls_pred = tf.reshape(cls_pred, pre_shape + [n_anchors, num_classes])
return cls_pred, loc_pred
4.ssd_anchors.py
"""
SSD anchors
"""
import math
import numpy as np
# 通过Smin和Smax计算anchor尺寸
def ssd_size_bounds_to_values(size_bounds,
n_feat_layers,
img_shape=(300, 300)):
"""Compute the reference sizes of the anchor boxes from relative bounds.
The absolute values are measured in pixels, based on the network
default size (300 pixels).
This function follows the computation performed in the original
implementation of SSD in Caffe.
Return:
list of list containing the absolute sizes at each scale. For each scale,
the ratios only apply to the first value.
"""
assert img_shape[0] == img_shape[1]
img_size = img_shape[0]
min_ratio = int(size_bounds[0] * 100)
max_ratio = int(size_bounds[1] * 100)
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2)))
# Start with the following smallest sizes.
sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]]
for ratio in range(min_ratio, max_ratio + 1, step):
sizes.append((img_size * ratio / 100.,
img_size * (ratio + step) / 100.))
return sizes
def ssd_anchor_one_layer(img_shape,
feat_shape,
sizes,
ratios,
step,
offset=0.5,
dtype=np.float32):
"""Computer SSD default anchor boxes for one feature layer.
Determine the relative position grid of the centers, and the relative
width and height.
Arguments:
feat_shape: Feature shape, used for computing relative position grids;
size: Absolute reference sizes;
ratios: Ratios to use on these features;
img_shape: Image shape, used for computing height, width relatively to the
former;
offset: Grid offset.
Return:
y, x, h, w: Relative x and y grids, and height and width.
"""
# Compute the position grid: simple way.
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
# y = (y.astype(dtype) + offset) / feat_shape[0]
# x = (x.astype(dtype) + offset) / feat_shape[1]
# Weird SSD-Caffe computation using steps values...
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
# * step 表示单元格中心位置相对300x300距离左上顶点的实际位置(/img_shape[0])
y = (y.astype(dtype) + offset) * step / img_shape[0]
x = (x.astype(dtype) + offset) * step / img_shape[1]
# Expand dims to support easy broadcasting.
y = np.expand_dims(y, axis=-1) # [size, size, 1]
x = np.expand_dims(x, axis=-1) # [size, size, 1]
# Compute relative height and width.
# Tries to follow the original implementation of SSD for the order.
num_anchors = len(sizes) + len(ratios)
h = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
w = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
# 宽高都是相对300x300的实际距离(/img_shape[0])
# Add first anchor boxes with ratio=1:1(for sizes[0])
h[0] = sizes[0] / img_shape[0]
w[0] = sizes[0] / img_shape[1]
di = 1
# Add second anchor boxes with ratio=1:1(for sizes[1])
if len(sizes) > 1:
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
di += 1
# Add rest ratios(only for sizes[0])
# why sqrt??? for train???
# X*ratio * X*(1/ratio) = X*X(面积不变)
for i, r in enumerate(ratios):
h[i+di] = sizes[0] / math.sqrt(r) / img_shape[0]
w[i+di] = sizes[0] * math.sqrt(r) / img_shape[1]
return y, x, h, w
def ssd_anchors_all_layers(img_shape,
layers_shape,
anchor_sizes,
anchor_ratios,
anchor_steps,
offset=0.5,
dtype=np.float32):
"""Compute anchor boxes for all feature layers.
"""
layers_anchors = []
for i, s in enumerate(layers_shape):
anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
anchor_sizes[i],
anchor_ratios[i],
anchor_steps[i],
offset=offset, dtype=dtype)
layers_anchors.append(anchor_bboxes)
return layers_anchors
'''
# 通过Smin和Smax计算anchor尺寸
output = ssd_size_bounds_to_values([0.15, 0.90], 6)
print(output)
'''
5.utils.py
"""
Help functions for SSD
"""
import cv2
import numpy as np
############## preprocess image ##################
# whiten the image
def whiten_image(image, means=(123., 117., 104.)):
"""Subtracts the given means from each image channel"""
if image.ndim != 3:
raise ValueError('Input must be of size [height, width, C>0]')
num_channels = image.shape[-1]
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
mean = np.array(means, dtype=image.dtype)
image = image - mean
return image
def resize_image(image, size=(300, 300)):
return cv2.resize(image, size)
def preprocess_image(image):
"""Preprocess a image to inference"""
image_cp = np.copy(image).astype(np.float32)
# whiten the image
image_whitened = whiten_image(image_cp)
# resize the image
image_resized = resize_image(image_whitened)
# expand the batch_size dim
image_expanded = np.expand_dims(image_resized, axis=0)
return image_expanded
############## process bboxes ##################
def bboxes_clip(bbox_ref, bboxes):
"""Clip bounding boxes with respect to reference bbox.
"""
bboxes = np.copy(bboxes)
bboxes = np.transpose(bboxes)
bbox_ref = np.transpose(bbox_ref)
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
bboxes = np.transpose(bboxes)
return bboxes
def bboxes_sort(classes, scores, bboxes, top_k=400):
"""Sort bounding boxes by decreasing order and keep only the top_k
"""
# if priority_inside:
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
# idxes = np.argsort(-scores)
# inside = inside[idxes]
# idxes = np.concatenate([idxes[inside], idxes[~inside]])
idxes = np.argsort(-scores)
classes = classes[idxes][:top_k]
scores = scores[idxes][:top_k]
bboxes = bboxes[idxes][:top_k]
return classes, scores, bboxes
def bboxes_iou(bboxes1, bboxes2):
"""Computing iou between bboxes1 and bboxes2.
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
"""
bboxes1 = np.transpose(bboxes1)
bboxes2 = np.transpose(bboxes2)
# Intersection bbox and volume.
int_ymin = np.maximum(bboxes1[0], bboxes2[0])
int_xmin = np.maximum(bboxes1[1], bboxes2[1])
int_ymax = np.minimum(bboxes1[2], bboxes2[2])
int_xmax = np.minimum(bboxes1[3], bboxes2[3])
int_h = np.maximum(int_ymax - int_ymin, 0.)
int_w = np.maximum(int_xmax - int_xmin, 0.)
int_vol = int_h * int_w
# Union volume.
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
iou = int_vol / (vol1 + vol2 - int_vol)
return iou
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5):
"""Apply non-maximum selection to bounding boxes.
"""
# 各类别一视同仁???为啥不分开???
# 一个位置很难同时出现两个类别???
keep_bboxes = np.ones(scores.shape, dtype=np.bool)
for i in range(scores.size-1):
if keep_bboxes[i]:
# Computer overlap with bboxes which are following.
overlap = bboxes_iou(bboxes[i], bboxes[(i+1):])
# Overlap threshold for keeping + checking part of the same class
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
idxes = np.where(keep_bboxes)
return classes[idxes], scores[idxes], bboxes[idxes]
def bboxes_resize(bbox_ref, bboxes):
"""Resize bounding boxes based on a reference bounding box,
assuming that the latter is [0, 0, 1, 1] after transform.
"""
bboxes = np.copy(bboxes)
# Translate.
bboxes[:, 0] -= bbox_ref[0]
bboxes[:, 1] -= bbox_ref[1]
bboxes[:, 2] -= bbox_ref[0]
bboxes[:, 3] -= bbox_ref[1]
# Resize.
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
bboxes[:, 0] /= resize[0]
bboxes[:, 1] /= resize[1]
bboxes[:, 2] /= resize[0]
bboxes[:, 3] /= resize[1]
return bboxes
def process_bboxes(rclasses, rscores, rbboxes, rbbox_img = (0.0, 0.0, 1.0, 1.0),
top_k=400, nms_threshold=0.5):
"""Process the bboxes including sort and nms"""
# 保证边框未越界
rbboxes = bboxes_clip(rbbox_img, rbboxes)
# 边框根据概率排序
rclasses, rscores, rbboxes = bboxes_sort(rclasses, rscores, rbboxes, top_k)
# NMS
rclasses, rscores, rbboxes = bboxes_nms(rclasses, rscores, rbboxes, nms_threshold)
# rbbox_img为固定的(0,0,1,1),不存在边界扩大或缩小问题,所以可以省略此步
rbboxes = bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes
6.visualization.py
import cv2
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as mpcm
# class names
CLASSES = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant",
"sheep", "sofa", "train","tvmonitor"]
# =========================================================================== #
# Matplotlib show...
# =========================================================================== #
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5, show_class_name=True):
"""Visualize bounding boxes. Largely inspired by SSD-MXNET!
"""
fig = plt.figure(figsize=figsize)
plt.imshow(img)
height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
cls_id = int(classes[i])
if cls_id >= 0:
score = scores[i]
if cls_id not in colors:
colors[cls_id] = (random.random(), random.random(), random.random())
ymin = int(bboxes[i, 0] * height)
xmin = int(bboxes[i, 1] * width)
ymax = int(bboxes[i, 2] * height)
xmax = int(bboxes[i, 3] * width)
rect = plt.Rectangle((xmin, ymin), xmax - xmin,
ymax - ymin, fill=False,
edgecolor=colors[cls_id],
linewidth=linewidth)
plt.gca().add_patch(rect)
class_name = CLASSES[cls_id-1] if show_class_name else str(cls_id)
plt.gca().text(xmin, ymin - 2,
'{:s} | {:.3f}'.format(class_name, score),
bbox=dict(facecolor=colors[cls_id], alpha=0.5),
fontsize=12, color='white')
plt.show()
三、模型下载
https://pan.baidu.com/s/1snhuTsT
四、结果展示
五、参考
https://zhuanlan.zhihu.com/p/33544892
任何问题请加唯一QQ2258205918(名称samylee)!
或唯一VX:samylee_csdn