1 补充说明(修正前面代码存在问题):
# 先验框筛选 def choose_anchor_boxes(self, predictions, anchor_box, n_box): # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes] anchor_box = tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4 predictions = tf.reshape(predictions, [n_box, 21])[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取 classes = tf.argmax(predictions, axis=1) + 1 # 得到preditions的概率最大类别的索引值, +1: 是因为代码里是0开始 scores = tf.reduce_max(predictions, axis=1) # 得到最大类别的得分, 当大于阈值就保留下来 classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件 scores = tf.boolean_mask(scores, scores > self.threshold) anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold) return classes, scores, anchor_box # ?=?需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask()
上述代码存在问题:
因为scores会被覆盖
classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件
scores = tf.boolean_mask(scores, scores > self.threshold)
anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)
=>因而使用下面算法代替,进而不影响scores值: ?=?不太清楚此代码的具体实现
filter_mask = scores > self.threshold
classes = tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件
scores = tf.boolean_mask(scores, filter_mask)
anchor_box = tf.boolean_mask(anchor_box, filter_mask)
=>或者改名为scores_nk
classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件
scores_nk = tf.boolean_mask(scores, scores > self.threshold)
anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)更新之后结果:
# /=== ===> 3:先验框筛选 <=== ===\ def choose_anchor_boxes(self, predictions, anchor_box, n_box): # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes] anchor_box = tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4 prediction = tf.reshape(predictions, [n_box, 21]) prediction = prediction[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取 classes = tf.argmax(prediction, axis=1) + 1 # 得到preditions的概率最大类别的索引值, 1表示按行找最大+1: 是因为代码里是0开始 scores = tf.reduce_max(prediction, axis=1) # 得到最大类别的得分, 当大于阈值就保留下来(下面后话) filter_mask = scores > self.threshold classes = tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件 scores = tf.boolean_mask(scores, filter_mask) anchor_box = tf.boolean_mask(anchor_box, filter_mask) return classes, scores, anchor_box # 需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask() # \=== ===> 3:先验框筛选 <=== ===/ # \=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===/
2 先验框排序
# /=== ===> 1:先验框排序 <=== ===\ def bboxes_sort(self, classes, scores, bboxes, top_k=400): idxes = np.argsort(-scores) # 先把scores从高到低排序,然后按照scores的顺序排序类别,分数,框 classes = classes[idxes][:top_k] # 取前面400分数最高的 scores = scores[idxes][:top_k] bboxes = bboxes[idxes][:top_k] return classes, scores, bboxes # \=== ===> 1:先验框排序 <=== ===/
np.argsort()
3 IOU
# /=== ===> 2:计算IOU <=== ===\ def bboxes_iou(self, bboxes1, bboxes2): bboxes1 = np.transpose(bboxes1) bboxes2 = np.transpose(bboxes2) # 计算两个box的交集, 交集左上角的点取两个box的max, 交集右下角的点取两个box的min # bboxes1[0]里面的元素:y1x1y2x2 int_ymin = np.maximun(bboxes1[0], bboxes2[0]) # 交叠框左上角的点y坐标 = 两个框左上角点的y坐标相比较, 取最大值 int_xmin = np.maximun(bboxes1[1], bboxes2[1]) # 交叠框左上角的点x坐标 = 两个框左上角点的x坐标相比较, 取最大值 int_ymax = np.minimun(bboxes1[2], bboxes2[2]) # 交叠框右下角的点y坐标 = 两个框左上角点的y坐标相比较, 取最小值 int_xmax = np.minmun(bboxes1[3], bboxes2[3]) # 交叠框右下角的点x坐标 = 两个框左上角点的x坐标相比较, 取最小值 # 计算两个box交集的wh: 如果两个box没有交集, 那么w,h为0(计算方式:w,h为负值, 跟0比较取最大) int_h = np.maximun(int_ymax - int_ymin, 0.) int_w = np.maximun(int_xmax - int_xmin, 0.) # 计算IOU int_vol = int_h * int_w # 交集面积 vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[0]) # bboxes1面积 vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[0]) # bboxes2面积 iou = int_vol / (vol1 + vol2 - int_vol) # IOU = 交集/并集 # \=== ===> 2:计算IOU <=== ===/
np.transpose()
4非极大值抑制
https://mp.csdn.net/postedit/98534699# /=== ===> 3:非极大值抑制nms <=== ===\ def bboxes_nms(self, classes, scores, bboxes, nms_threshold=0.5): keep_bboxes = np.ones(scores.shape, dtype=np.bool) for i in range(scores.size - 1): if keep_bboxes[i]: overlap = self.bboxes_iou(bboxes[i], bboxes[(i + 1):]) # 每个bboxes[i]都与其他的计算得到iou keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i]) # 逻辑或 keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) # 逻辑与 idxes = np.where(keep_bboxes) return classes[idxes], scores[idxes], bboxes[idxes] # \=== ===> 3:非极大值抑制nms <=== ===/
# /=== ===> 3:非极大值抑制nms <=== ===\
=> def bboxes_nms(self, classes, scores, bboxes, nms_threshold=0.5):
=> keep_bboxes = np.ones(scores.shape, dtype=np.bool)
>>> scores.shape = array([[1, 2], [3, 4]])
>>> keep_bboxes = np.ones(a.shape, dtype=np.bool)
>>> keep_bboxes
array([[ True, True],
[ True, True]], dtype=bool)
=> for i in range(scores.size - 1):
>>> a.size = 4
=> if keep_bboxes[i]:
=> overlap = self.bboxes_iou(bboxes[i], bboxes[(i + 1):]) # 每个bboxes[i]都与其他的计算得到iou
这里自动广播()broadcost => bboxes[(i + 1):]是多个框
overlap是一个<class 'numpy.ndarray'>
示例如下:# ===> 解释上述代码 <=== import numpy as np def bboxes_iou(a, b): iou = a + b return iou if __name__ == '__main__': c = np.array([1, 2, 3, 4]) for i in range(3): overlap = bboxes_iou(c[i], c[(i+1):]) print(overlap) >>>[3 4 5] >>>[5 6] >>>[7]
=> keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i]) # 逻辑或
交并比<阈值的 or 两者类别不同的, 会被选中到keep_voerlap里面, 这些都是不要的# ===> 解释上述代码 <=== import numpy as np def bboxes_iou(a, b): iou = a + b return iou if __name__ == '__main__': a = np.array([1, 2, 3, 1]) classes = np.array([1, 1, 2, 1]) for i in range(3): overlap = bboxes_iou(a[i], a[(i+1):]) # print('overlap:', overlap) # print('classes[i:] != classes[i]:', classes[i:] != classes[i]) keepoverlap = np.logical_or(overlap < 3, classes[(i+1):] != classes[i]) # print('keepoverlap:', keepoverlap) # print('===> 换行 <===') # ===> 输出 <=== overlap: [3 4 2] classes[i:] != classes[i]: [False True False] keepoverlap: [False True True] # ===> 换行 <=== overlap: [5 3] classes[i:] != classes[i]: [ True False] keepoverlap: [ True False] # ===> 换行 <=== overlap: [4] classes[i:] != classes[i]: [ True] keepoverlap: [ True] # ===> 换行 <===
=> keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) # 逻辑与
# keep_bboxes[(i+1):]是当前bbox后面的一些框, keep_overlap是数组里面有true false, 与操作留下想要的c = np.ones([2, ], dtype=np.bool) print(c) a = np.array([True, False]) b = np.array([1, 2]) c = np.logical_and(a, b) print(c) # [True True] # [True False]
=> idxes = np.where(keep_bboxes)
# https://www.cnblogs.com/massquantity/p/8908859.html
a = np.array([True, False]) b = np.array([1, 2]) print(np.where(a)) # (array([0]),)
=> return classes[idxes], scores[idxes], bboxes[idxes]
# \=== ===> 3:非极大值抑制nms <=== ===/=>此部分指令:
np.ones(scores.shape, dtype=np.bool)np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])
idxes = np.where(keep_bboxes)
需要学的python命令:
1 np.argsort()
2 np.transpose()
3 np.ones(scores.shape, dtype=np.bool)
>>> a = np.array([[1,2],[3,4]]) >>> a array([[1, 2], [3, 4]]) >>> b = np.ones(a.shape, dtype=np.bool) >>> b array([[ True, True], [ True, True]], dtype=bool) >>> a.size 4
4 np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])
5 keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
>>> np.logical_and(True, False) False >>> np.logical_and([True, False], [False, False]) array([False, False], dtype=bool)
>>> x = np.arange(5) >>> x array([0, 1, 2, 3, 4]) >>> np.logical_and(x>1, x<4) array([False, False, True, True, False], dtype=bool)
6 idxes = np.where(keep_bboxes)
当前最终版:
#!usr/bin/python # -*- coding: utf-8 -*- # Creation Date: 2019/7/10 import tensorflow as tf import numpy as np import cv2 ''' 注释说明 # 1 /=== === === ===>xxxx<=== === === ===\ 为一级标题-start |=== === === ===>xxxx 为一级标题的补充说明 \=== === === ===>xxxx<=== === === ===/ 为一级标题-end # 2 /=== === ===>xxxx<=== === ===\ 为二级标题-start |=== === ===>xxxx 为二级标题的补充说明 \==== === ===>xxxx<=== === ===/ 为二级标题-end # 3 /=== ===>xxxx<=== ===\ 为三级标题-start |=== ===>xxxx 为三级标题的补充说明 \=== ===>xxxx<=== ===/ 为三级标题-end # 4 /===>xxxx<===\ 为四级标题 ===>xxxx<=== 为四级标题的简化形式 |===> xxx 为四级标题的补充说明 \===>xxxx<===/ 为四级标题-end # 5 ==> or => 为重点-特殊情况 # 6 ?=? 存在疑惑 ''' class ssd(object): def __init__(self): # ===>完善:构造函数的参数<=== self.num_boxes = [] # 统计锚点框的个数 self.feaeture_map_size = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)] # 特征图的大小 self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] # 这里有20个,加上背景是21个类别 self.feature_layers = ['block4', 'block7', 'block8', 'block9', 'block10', 'block11'] # 用于检测的特征层的名字 self.img_size = (300, 300) # 图片的大小 self.num_classes = 21 # 类别的个数,背景也算一类, 第一个类似是:'bg'背景 self.boxes_len = [4, 6, 6, 6, 4, 4] # 6个特征图生成的一组锚点框的框的个数,4 10 11层是4个一组,其它的是6个一组 # block4: 38x38大小的特征图就会生成 38x38x4 个锚点框 5766 # block7: 19x19大小的特征图就会生成 19x19x6 个锚点框 2166 # block8: 10x10大小的特征图就会生成 10x10x6 个锚点框 600 # block9: 5x5大小的特征图就会生成 5x5x6 个锚点框 150 # block10: 3x3大小的特征图就会生成 3x3x4 个锚点框 36 # block11: 1x1大小的特征图就会生成 1x1x4 个锚点框 4 # 一共8732个锚点框 ?=?我算出来的是8722个 self.isL2norm = [True, False, False, False, False, False] # block4比较靠前, 因为norm太大需要L2norm self.anchor_size = [(21., 45.), (45., 99.), (99., 153.), (153., 207.), (207., 261.), (261., 315.)] self.anchor_ratios = [[2, .5], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5], [2, .5]] self.anchor_steps = [8, 16, 32, 64, 100, 300] self.prior_scaling = [0.1, 0.1, 0.2, 0.2] # 特征先验框缩放比例: 0.1:xy坐标的缩放比, 0.2:wy坐标的缩放比 self.n_boxes = [5776, 2166, 600, 150, 36, 4] # 一共8732个 # ?=?怎么计算的, 每个featuremap(特征图)先验框的个数 # 4层: 38x38x4 # 7层: 19x19x6 # 8层: 10x10x6 # 9层: 5x5x6 # 10层: 3x3x4 # 11层: 1x1x4 self.threshold = 0.2 # 原文中是0.5, 为了检测到更多的物体设置为0.2 # /==== === === ===> ssd网络架构部分 <=== === === ====\ # ==== ===>l2正则化<=== ==== def l2norm(self, x, scale, trainable=True, scope='L2Normalization'): n_channels = x.get_shape().as_list()[-1] # 通道数. 得到形状,变成列表,取后一个 l2_norm = tf.nn.l2_normalize(x, dim=[3], epsilon=1e-12) # 只对每个像素点在channels上做归一化 with tf.variable_scope(scope): gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32, initializer=tf.constant_initializer(scale), # ?=?为何作者没有这步骤 trainable=trainable) return l2_norm * gamma # /=== ===>下面:定义cnn所需组件<=== ====\ # |=== ===conv2d, max_pool2d, pad2d, dropout # |=== ===tf.layers.conv2d(inputs=xxx, filters=xxx, kernel_size=xxx, # stride=xxx, padding=xxx, dilation_rate=xxx, # name=xxx, activation=xxx) # |=== ===tf.layers.max_pooling2d(inputs=xxx, pool_size=xxx, # strides=xxx, padding=xxx, # name=xxx) # |=== ===tf.pad(x, paddings=xxx) # |=== ===tf.layers.dropout(inputs=xxx, rate=xxx) # |=== ===定义一个卷积的操作: 1输入 2卷积核个数 3卷积核大小| 4步长 5padding 6膨胀| 7激活函数 8名字 def conv2d(self, x, filter, k_size, # 输入x, 卷积核的个数filter, k_size卷积核是几*几 stride=[1, 1], padding='same', dilation=[1, 1], # # 步长stride, padding, 空洞卷积指数这里1代表正常卷积 activation=tf.nn.relu, scope='conv2d'): # 激活函数relu, 名字scope return tf.layers.conv2d(inputs=x, filters=filter, kernel_size=k_size, strides=stride, dilation_rate=dilation, padding=padding, name=scope, activation=activation) def max_pool2d(self, x, pool_size, stride, scope='max_pool2d'): # 我猜padding是vaild return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding='valid', name=scope) # 用于填充s=2的第8,9层. 从6层往后的卷积层需要自己填充, 不要用它自带的填充. def pad2d(self, x, pad): return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]]) def dropout(self, x, d_rate=0.5): return tf.layers.dropout(inputs=x, rate=d_rate) def ssd_prediction(self, x, num_classes, box_num, isL2norm, scope='multibox'): reshape = [-1] + x.getshape().as_list()[1:-1] # 去除第1,4数,拿到2,3个数,变成列表.即去除第一个和最后一个得到shape # python中 a = [1,2,3,4], b = a[1:3]=a[1:-1], c = [-1] + b # print(b) = [2,3], print(c) = [-1, 1, 2, 3, 4] # block8为例:shape = (?, 10, 10, 512)需要把第2,3个数拿出来 # 前面的-1表示batch, 因为不知道是多少在这里tf一般写-1 # reshape = [-1, 10, 10] with tf.variable_scope(scope): # 开始进行卷积 if isL2norm: x = self.l2norm(x) # 先判断是否需要归一化 # ==>预测位置:坐标和大小,回归问题:不需softmax location_pred = self.conv2d(x, filter=box_num * 4, k_size=[3 * 3], activation=None, scope='conv_loc') '''filter:卷积核的个数=一个锚点多少框 x 一个框对应的4个数据xywh, 卷积核3x3,不需要激活函数,默认def conv2d有激活函数的''' location_pred = tf.reshape(location_pred, reshape + [box_num, 4]) # 每个中心点生成一个锚点框?=? # reshape + [box_num, 4] = [-1, 10, 10, box_num, 4] # ==>预测类别:分类问题:需要softmax class_pred = self.conv2d(x, filter=box_num * num_classes, k_size=3 * 3, activation=None, scope='conv_cls') '''filter:卷积核的个数=一个锚点多少框 x 一个框对应的21个类别, 卷积核3x3,不需要激活函数,默认def conv2d是有激活函数的''' class_pred = tf.shape(class_pred, reshape + [box_num, num_classes]) # ?=? # reshape + [box_num, num_classes] = [-1, 10, 10, box_num, num_classes] print(location_pred, class_pred) return location_pred, class_pred # \=== ===>上面:定义cnn所需组件<=== ====/ # /=== ===>下面:具体网络架构-start<=== ===\ def set_net(self): check_points = {} # 装特征层的字典,用于循环迭代 predictions = [] locations = [] x = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3]) with tf.variable_scope('ssd_300_vgg'): # ===>VGG前5层<=== # b1 net = self.conv2d(x, filter=64, k_size=[3, 3], scope='conv1_1') # 64个3*3卷积核, s=1 默认,标准卷积 net = self.conv2d(net, 64, [3, 3], scope='conv1_2') # 64个3*3卷积核, s=1默认 net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool1') # 池化层2*2卷积核, s=2 默认,池化层一般都是2 # b2 net = self.conv2d(net, filter=128, k_size=[3, 3], scope='conv2_1') net = self.conv2d(net, 128, [3, 3], scope='conv2_2') net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool2') # b3 net = self.conv2d(net, filter=256, k_size=[3, 3], scope='conv3_1') net = self.conv2d(net, 256, [3, 3], scope='conv3_2') net = self.conv2d(net, 256, [3, 3], scope='conv3_3') net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool3') # b4 =>第1个检测层 net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv4_1') net = self.conv2d(net, 512, [3, 3], scope='conv4_2') net = self.conv2d(net, 512, [3, 3], scope='conv4_3') check_points['block4'] = net net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool4') # b5 关键部分来了,这里与vgg不同了 net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv5_1') net = self.conv2d(net, 512, [3, 3], scope='conv5_2') net = self.conv2d(net, 512, [3, 3], scope='conv5_3') net = self.max_pool2d(net, pool_size=[3, 3], stride=[1, 1], scope='pool5') # =>池化层3*3核, 步长变成1*1 # ===>卷积层,代替VGG全连接层<=== # b6 conv6: 3x3x1024-d6 net = self.conv2d(net, filter=1024, k_size=[3, 3], dilation=[6, 6], scope='conv6') # => 个数1024, dilation=[6, 6] # b7 conv7: 1x1x1024 =>第2个检测层 net = self.conv2d(net, filter=1024, k_size=[1, 1], scope='conv7') # => 个数1024, 卷积核是[1, 1] check_points['block7'] = net # b8 conv8_1: 1x1x256; conv8_2: 3x3x512-s2-vaild =>第3个检测层 net = self.conv2d(net, 256, [1, 1], scope='conv8_1x1') # =>个数256,卷积核1x1 net = self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scope='conv8_3x3', padding='valid') # =>个数512, 卷积核3x3, 步长2, 'valid' check_points['block8'] = net # b9 conv9_1: 1x1x128 conv8_2: 3x3x256-s2-vaild =>第4个检测层 net = self.conv2d(net, 128, [1, 1], scope='conv9_1x1') # =>个数128,卷积核1x1 net = self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scope='conv9_3x3', padding='valid') # =>个数256,卷积核3x3,步长2x2, valid check_points['block9'] = net # b10 conv10_1: 1x1x128 conv10_2: 3x3x256-s1-valid =>第5个检测层 net = self.conv2d(net, 128, [1, 1], scope='conv10_1x1') # =>个数128,卷积核1x1 net = self.conv2d(net, 256, [3, 3], scope='conv10_3x3', padding='valid') # =>个数256,valid check_points['block10'] = net # b11 conv11_1: 1x1x128 conv11_2: 3x3x256-s1-valid =>第6检测层 net = self.conv2d(net, 128, [1, 1], scope='conv11_1x1') # =>个数128,卷积核1x1 net = self.conv2d(net, 256, [3, 3], scope='conv11_3x3', padding='valid') # =>个数256, valid check_points['block11'] = net for i, j in enumerate(self.feature_layers): # 枚举特征层i表示第几个, j是名字如'block4' loc, cls = self.ssd_prediction( x=check_points[j], num_classes=self.num_classes, box_num=self.boxes_len[i], isL2norm=self.isL2norm[i], scope=j + '_box' ) predictions.append(tf.nn.softmax(cls)) # 需要softmax locations.append(loc) # 不需要 print(check_points) # 检查网络的结构, eg:block8: (?, 10, 10, 512) print(locations, predictions) return locations, predictions, x # locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w # locations是列表, 里面的元素形如:[-1, 10, 10, box_num, 4] # \=== ===>上面:具体网络架构<=== ===/ # \=== === === ===> ssd网络架构部分-end <=== === === ===/ # /=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===\ # /=== ===> 1:先验框生成-start <=== ===\ # |=== 以block8为例10x10, 生成先验框 # |=== h0, h1, h2, h3, h4, h5: # |=== 小正方形 大正方形 1/2长方形 2/1长方形 1/3长方形 3/1长方形 def ssd_anchor_layer(self, img_size, feature_map_size, anchor_size, anchor_ratio, anchor_step, box_num, offset=0.5): # 提取feature map 的每一个坐标 y, x = np.mgrid[0: feature_map_size[0], 0:feature_map_size[1]] # 以block8为例这里是 0:10,0:10 # >>> y, x= np.mgrid[0:10, 0:10] # >>> print(x) # [[0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9] # [0 1 2 3 4 5 6 7 8 9]] # >>> print(y) # [[0 0 0 0 0 0 0 0 0 0] # [1 1 1 1 1 1 1 1 1 1] # [2 2 2 2 2 2 2 2 2 2] # [3 3 3 3 3 3 3 3 3 3] # [4 4 4 4 4 4 4 4 4 4] # [5 5 5 5 5 5 5 5 5 5] # [6 6 6 6 6 6 6 6 6 6] # [7 7 7 7 7 7 7 7 7 7] # [8 8 8 8 8 8 8 8 8 8] # [9 9 9 9 9 9 9 9 9 9]] y = (y.astype(np.float32) + offset) * anchor_step / img_size[0] x = (x.astype(np.float32) + offset) * anchor_step / img_size[1] # 计算两个长宽比为1的 h, w h = np.zeros((box_num,), np.float32) w = np.zeros((box_num,), np.float32) # h >>> array([ 0., 0., 0., 0., 0., 0.], dtype=float32) # w >>> array([ 0., 0., 0., 0., 0., 0.], dtype=float32) h[0] = anchor_size[0] / img_size[0] # 小正方形 w[0] = anchor_size[0] / img_size[0] h[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0] # 大正方形 w[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0] for i, j in enumerate(anchor_ratio): h[i + 2] = anchor_size[0] / img_size[0] / (j ** 0.5) w[i + 2] = anchor_size[0] / img_size[0] * (j ** 0.5) return y, x, h, w # h[0]=99/300 w[0]=99/300: 小 正方型 h w # h[1]=sqrt(99*513)/300 w[1]=sqrt(99*513)/300: 大 正方型 h w # h[2]=99/300/sqrt(2) w[2]=99/300*sqrt(2): 横向 - 长方型2/1 对应 anchor_ratio[0] = 2 # h[3]=99/300/sqrt(0.5) w[2]=99/300*sqrt(0.5): 纵向 | 长方型1/2 对应 anchor_ratio[1] = .5 # h[4]=99/300/sqrt(3) w[2]=99/300*sqrt(3): 横向 - 长方型3/1 对应 anchor_ratio[2] = 3 # h[5]=99/300/sqrt(1/3) w[2]=99/300*sqrt(1/3): 纵向 | 长方型1/3 对应 anchor_ratio[3] = 1./3 # h[0]正方型 h[1]正方型 h[2]长方型2/1 h[3]长方型1/2 h[4]长方型3/1 h[5]长方型1/3 # h = array([ 0.33000001, 0.41024384, 0.23334524, 0.46669048, 0.19052559, 0.57157677], dtype=float32) # w = array([ 0.33000001, 0.41024384, 0.46669048, 0.23334524, 0.57157677, 0.19052559], dtype=float32) # \=== ===> 1:先验框生成-end <=== ===/ # /=== ===> 2:解码-start <=== ===\ def ssd_decode(self, location, box, prior_scaling): y_a, x_a, h_a, w_a = box cx = location[:, :, :, :, 0] * w_a * prior_scaling[0] + x_a # ?=?这部分应该是w h cy = location[:, :, :, :, 1] * h_a * prior_scaling[1] + y_a # locations是set_net网络的返回值 # locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w # locations是列表, 里面的元素形如:[-1, 10, 10, box_num, 4] # 在这里location被带入了locations[2], 也就是block8层 w = w_a * tf.exp(location[:, :, :, :, 2] * prior_scaling[2]) h = h_a * tf.exp(location[:, :, :, :, 3] * prior_scaling[3]) # 实际格子的高度 print(cx, cy, w, h) bboxes = tf.stack([cy - h/2.0, cx - w/2.0, cy + h/2.0, cx + w/2.0], axis=-1) # 特征图比较多,需要叠加起来 # 左上角点的y坐标 cy-h/2, x坐标cx-w/2. 右下角点的y坐标 cy+h/2, x坐标cx+w/2 print(bboxes) return bboxes # \=== ===> 2:解码-end <=== ===/ # /=== ===> 3:先验框筛选 <=== ===\ def choose_anchor_boxes(self, predictions, anchor_box, n_box): # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes] anchor_box = tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4 prediction = tf.reshape(predictions, [n_box, 21]) prediction = prediction[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取 classes = tf.argmax(prediction, axis=1) + 1 # 得到preditions的概率最大类别的索引值, 1表示按行找最大+1: 是因为代码里是0开始 scores = tf.reduce_max(prediction, axis=1) # 得到最大类别的得分, 当大于阈值就保留下来(下面后话) filter_mask = scores > self.threshold classes = tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件 scores = tf.boolean_mask(scores, filter_mask) anchor_box = tf.boolean_mask(anchor_box, filter_mask) return classes, scores, anchor_box # 需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask() # \=== ===> 3:先验框筛选 <=== ===/ # \=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===/ if __name__ == '__main__': sd = ssd() locations, predictions, x = sd.set_net() box = sd.ssd_anchor_layer(sd.img_size, (10, 10), (99., 153.), [2., .5, 3., 1/3], 32, 6) boex = sd.ssd_decode(locations[2], box, sd.prior_scaling) print(boex) # shape = (?, 10, 10, 6, 4) # 这里以block8为例:的输出结果为Tensor("stacck:0", shape=(?, 10, 10, 6, 4), dtype=float32) # 10, 10表示的是第三个特征层为10x10, 因为是locations[2] ?=? # 6表示六个特征图 ?=? # 4表示 左上角&右下角坐标 max_x max_y min_x min_y # locations[0]是38x38 locations[1]是19x19 locations[2]是10x10 locations[3]是5x5 [4]是3x3 [5]是2x2 [6]是1x1 cls, sco, a_box = sd.choose_anchor_boxes(predictions[2], boex, sd.n_boxes[2]) print('----------------------------') print(cls, sco, a_box)