A级课程--- 从零开始(七)---ssd 2-4训练部分 收尾补充(IOU, NMS)

https://www.bilibili.com/video/av43996494/?p=6

 

1 补充说明(修正前面代码存在问题):

# 先验框筛选
    def choose_anchor_boxes(self, predictions, anchor_box, n_box):
        # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes]
        anchor_box = tf.reshape(anchor_box, [n_box, 4])  # 5d张量改为2d张量  ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4
        predictions = tf.reshape(predictions, [n_box, 21])[:, 1:]  # 第一个0是背景的置信度, 我们不需要,从1往后取
        classes = tf.argmax(predictions, axis=1) + 1  # 得到preditions的概率最大类别的索引值, +1: 是因为代码里是0开始
        scores = tf.reduce_max(predictions, axis=1)  # 得到最大类别的得分, 当大于阈值就保留下来

        classes = tf.boolean_mask(classes, scores > self.threshold)   # 前面放筛选目标, 后面放筛选条件
        scores = tf.boolean_mask(scores, scores > self.threshold)
        anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)

        return classes, scores, anchor_box
        # ?=?需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask()

上述代码存在问题:
因为scores会被覆盖
        classes = tf.boolean_mask(classes, scores > self.threshold)   # 前面放筛选目标, 后面放筛选条件
        scores = tf.boolean_mask(scores, scores > self.threshold)
        anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)
=>因而使用下面算法代替,进而不影响scores值:  ?=?不太清楚此代码的具体实现
        filter_mask = scores > self.threshold
        classes = tf.boolean_mask(classes, filter_mask)   # 前面放筛选目标, 后面放筛选条件
        scores = tf.boolean_mask(scores, filter_mask)
        anchor_box = tf.boolean_mask(anchor_box, filter_mask)
=>或者改名为scores_nk
        classes = tf.boolean_mask(classes, scores > self.threshold)   # 前面放筛选目标, 后面放筛选条件
        scores_nk = tf.boolean_mask(scores, scores > self.threshold)
        anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)

更新之后结果:

# /=== ===> 3:先验框筛选 <=== ===\
    def choose_anchor_boxes(self, predictions, anchor_box, n_box):
        # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes]
        anchor_box = tf.reshape(anchor_box, [n_box, 4])  # 5d张量改为2d张量  ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4
        prediction = tf.reshape(predictions, [n_box, 21])
        prediction = prediction[:, 1:]  # 第一个0是背景的置信度, 我们不需要,从1往后取
        classes = tf.argmax(prediction, axis=1) + 1  # 得到preditions的概率最大类别的索引值, 1表示按行找最大+1: 是因为代码里是0开始
        scores = tf.reduce_max(prediction, axis=1)  # 得到最大类别的得分, 当大于阈值就保留下来(下面后话)

        filter_mask = scores > self.threshold
        classes = tf.boolean_mask(classes, filter_mask)   # 前面放筛选目标, 后面放筛选条件
        scores = tf.boolean_mask(scores, filter_mask)
        anchor_box = tf.boolean_mask(anchor_box, filter_mask)

        return classes, scores, anchor_box
        # 需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask()
    # \=== ===> 3:先验框筛选 <=== ===/
# \=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===/

2 先验框排序

 

# /=== ===> 1:先验框排序 <=== ===\
    def bboxes_sort(self, classes, scores, bboxes, top_k=400):
        idxes = np.argsort(-scores)  # 先把scores从高到低排序,然后按照scores的顺序排序类别,分数,框
        classes = classes[idxes][:top_k]  # 取前面400分数最高的
        scores = scores[idxes][:top_k]
        bboxes = bboxes[idxes][:top_k]
        return classes, scores, bboxes
# \=== ===> 1:先验框排序 <=== ===/

np.argsort()

3 IOU

# /=== ===> 2:计算IOU <=== ===\
    def bboxes_iou(self, bboxes1, bboxes2):
        bboxes1 = np.transpose(bboxes1)
        bboxes2 = np.transpose(bboxes2)

        # 计算两个box的交集, 交集左上角的点取两个box的max, 交集右下角的点取两个box的min
        # bboxes1[0]里面的元素:y1x1y2x2
        int_ymin = np.maximun(bboxes1[0], bboxes2[0])  # 交叠框左上角的点y坐标 = 两个框左上角点的y坐标相比较, 取最大值
        int_xmin = np.maximun(bboxes1[1], bboxes2[1])  # 交叠框左上角的点x坐标 = 两个框左上角点的x坐标相比较, 取最大值
        int_ymax = np.minimun(bboxes1[2], bboxes2[2])  # 交叠框右下角的点y坐标 = 两个框左上角点的y坐标相比较, 取最小值
        int_xmax = np.minmun(bboxes1[3], bboxes2[3])  # 交叠框右下角的点x坐标 = 两个框左上角点的x坐标相比较, 取最小值

        # 计算两个box交集的wh: 如果两个box没有交集, 那么w,h为0(计算方式:w,h为负值, 跟0比较取最大)
        int_h = np.maximun(int_ymax - int_ymin, 0.)
        int_w = np.maximun(int_xmax - int_xmin, 0.)

        # 计算IOU
        int_vol = int_h * int_w  # 交集面积
        vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[0])  # bboxes1面积
        vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[0])  # bboxes2面积
        iou = int_vol / (vol1 + vol2 - int_vol)  # IOU = 交集/并集
# \=== ===> 2:计算IOU <=== ===/

np.transpose()

4非极大值抑制
https://mp.csdn.net/postedit/98534699

# /=== ===> 3:非极大值抑制nms <=== ===\
    def bboxes_nms(self, classes, scores, bboxes, nms_threshold=0.5):
        keep_bboxes = np.ones(scores.shape, dtype=np.bool)
        for i in range(scores.size - 1):
            if keep_bboxes[i]:
                overlap = self.bboxes_iou(bboxes[i], bboxes[(i + 1):])  # 每个bboxes[i]都与其他的计算得到iou
                keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])  # 逻辑或
                keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)  # 逻辑与
        idxes = np.where(keep_bboxes)
        return classes[idxes], scores[idxes], bboxes[idxes]
# \=== ===> 3:非极大值抑制nms <=== ===/

# /=== ===> 3:非极大值抑制nms <=== ===\
=>    def bboxes_nms(self, classes, scores, bboxes, nms_threshold=0.5):
=>        keep_bboxes = np.ones(scores.shape, dtype=np.bool)

>>> scores.shape = array([[1, 2], [3, 4]])
>>> keep_bboxes = np.ones(a.shape, dtype=np.bool)
>>> keep_bboxes
array([[ True,  True],
          [ True,  True]], dtype=bool)

=>        for i in range(scores.size - 1):
>>> a.size = 4
=>            if keep_bboxes[i]:
=>                overlap = self.bboxes_iou(bboxes[i], bboxes[(i + 1):])
  # 每个bboxes[i]都与其他的计算得到iou
这里自动广播()broadcost  =>  bboxes[(i + 1):]是多个框
overlap是一个<class 'numpy.ndarray'>
示例如下:

# ===> 解释上述代码 <===
import numpy as np
def bboxes_iou(a, b):
    iou = a + b
    return iou
if __name__ == '__main__':
    c = np.array([1, 2, 3, 4])
    for i in range(3):
        overlap = bboxes_iou(c[i], c[(i+1):])
        print(overlap)
>>>[3 4 5]
>>>[5 6]
>>>[7]

=>                keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])  # 逻辑或
交并比<阈值的 or 两者类别不同的, 会被选中到keep_voerlap里面, 这些都是不要的

# ===> 解释上述代码 <===
import numpy as np
def bboxes_iou(a, b):
    iou = a + b
    return iou
if __name__ == '__main__':
    a = np.array([1, 2, 3, 1])
    classes = np.array([1, 1, 2, 1])
    for i in range(3):
        overlap = bboxes_iou(a[i], a[(i+1):])
        # print('overlap:', overlap)
        # print('classes[i:] != classes[i]:', classes[i:] != classes[i])
        keepoverlap = np.logical_or(overlap < 3, classes[(i+1):] != classes[i])
        # print('keepoverlap:', keepoverlap)
        # print('===> 换行 <===')
# ===> 输出 <===
overlap: [3 4 2]
classes[i:] != classes[i]: [False  True False]
keepoverlap: [False  True  True]
# ===> 换行 <===
overlap: [5 3]
classes[i:] != classes[i]: [ True False]
keepoverlap: [ True False]
# ===> 换行 <===
overlap: [4]
classes[i:] != classes[i]: [ True]
keepoverlap: [ True]
# ===> 换行 <===


=>                keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)  # 逻辑与
      # keep_bboxes[(i+1):]是当前bbox后面的一些框, keep_overlap是数组里面有true false, 与操作留下想要的

c = np.ones([2, ], dtype=np.bool)
print(c)
a = np.array([True, False])
b = np.array([1, 2])
c = np.logical_and(a, b)
print(c)
    # [True  True]
    # [True False]

=>        idxes = np.where(keep_bboxes)
#
https://www.cnblogs.com/massquantity/p/8908859.html
 

a = np.array([True, False])
b = np.array([1, 2])
print(np.where(a))
    # (array([0]),)


=>        return classes[idxes], scores[idxes], bboxes[idxes]
# \=== ===> 3:非极大值抑制nms <=== ===/

=>此部分指令:
np.ones(scores.shape, dtype=np.bool)

np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])

idxes = np.where(keep_bboxes)

需要学的python命令:

1 np.argsort()

2 np.transpose()

3 np.ones(scores.shape, dtype=np.bool)

>>> a = np.array([[1,2],[3,4]])
>>> a
array([[1, 2],
       [3, 4]])
>>> b = np.ones(a.shape, dtype=np.bool)
>>> b
array([[ True,  True],
       [ True,  True]], dtype=bool)
>>> a.size
4

4 np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i])

5 keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)

>>> np.logical_and(True, False)
False
>>> np.logical_and([True, False], [False, False])
array([False, False], dtype=bool)
>>> x = np.arange(5)
>>> x
array([0, 1, 2, 3, 4])
>>> np.logical_and(x>1, x<4)
array([False, False,  True,  True, False], dtype=bool)

6 idxes = np.where(keep_bboxes)

 

 

当前最终版:

#!usr/bin/python
# -*- coding: utf-8 -*-
# Creation Date: 2019/7/10
import tensorflow as tf
import numpy as np
import cv2
''' 注释说明 
# 1 /=== === === ===>xxxx<=== === === ===\  为一级标题-start
   |=== === === ===>xxxx                    为一级标题的补充说明
    \=== === === ===>xxxx<=== === === ===/  为一级标题-end
    
# 2 /=== === ===>xxxx<=== === ===\  为二级标题-start
    |=== === ===>xxxx                 为二级标题的补充说明
    \==== === ===>xxxx<=== === ===/  为二级标题-end

# 3 /=== ===>xxxx<=== ===\  为三级标题-start
    |=== ===>xxxx             为三级标题的补充说明
    \=== ===>xxxx<=== ===/   为三级标题-end

# 4 /===>xxxx<===\  为四级标题                 ===>xxxx<===     为四级标题的简化形式
   |===> xxx        为四级标题的补充说明
    \===>xxxx<===/    为四级标题-end

# 5 ==> or =>  为重点-特殊情况

# 6 ?=?  存在疑惑

'''


class ssd(object):

    def __init__(self):
        # ===>完善:构造函数的参数<===
        self.num_boxes = []  # 统计锚点框的个数
        self.feaeture_map_size = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]  # 特征图的大小
        self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
                        "bus", "car", "cat", "chair", "cow",
                        "diningtable", "dog", "horse", "motorbike", "person",
                        "pottedplant", "sheep", "sofa", "train", "tvmonitor"]  # 这里有20个,加上背景是21个类别
        self.feature_layers = ['block4', 'block7', 'block8', 'block9', 'block10', 'block11']  # 用于检测的特征层的名字
        self.img_size = (300, 300)  # 图片的大小
        self.num_classes = 21  # 类别的个数,背景也算一类, 第一个类似是:'bg'背景
        self.boxes_len = [4, 6, 6, 6, 4, 4]  # 6个特征图生成的一组锚点框的框的个数,4 10 11层是4个一组,其它的是6个一组
        # block4:  38x38大小的特征图就会生成 38x38x4 个锚点框 5766
        # block7:  19x19大小的特征图就会生成 19x19x6 个锚点框 2166
        # block8:  10x10大小的特征图就会生成 10x10x6 个锚点框 600
        # block9:  5x5大小的特征图就会生成     5x5x6 个锚点框 150
        # block10: 3x3大小的特征图就会生成     3x3x4 个锚点框 36
        # block11: 1x1大小的特征图就会生成     1x1x4 个锚点框 4
        # 一共8732个锚点框 ?=?我算出来的是8722个
        self.isL2norm = [True, False, False, False, False, False]  # block4比较靠前, 因为norm太大需要L2norm
        self.anchor_size = [(21., 45.), (45., 99.), (99., 153.), (153., 207.), (207., 261.), (261., 315.)]
        self.anchor_ratios = [[2, .5], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5], [2, .5]]
        self.anchor_steps = [8, 16, 32, 64, 100, 300]

        self.prior_scaling = [0.1, 0.1, 0.2, 0.2]  # 特征先验框缩放比例: 0.1:xy坐标的缩放比, 0.2:wy坐标的缩放比
        self.n_boxes = [5776, 2166, 600, 150, 36, 4]
        # 一共8732个  # ?=?怎么计算的, 每个featuremap(特征图)先验框的个数
        # 4层:  38x38x4
        # 7层:  19x19x6
        # 8层:  10x10x6
        # 9层:  5x5x6
        # 10层: 3x3x4
        # 11层: 1x1x4
        self.threshold = 0.2  # 原文中是0.5, 为了检测到更多的物体设置为0.2


# /==== === === ===>       ssd网络架构部分       <=== === === ====\
    # ==== ===>l2正则化<=== ====
    def l2norm(self, x, scale, trainable=True, scope='L2Normalization'):
        n_channels = x.get_shape().as_list()[-1]  # 通道数. 得到形状,变成列表,取后一个
        l2_norm = tf.nn.l2_normalize(x, dim=[3], epsilon=1e-12)  # 只对每个像素点在channels上做归一化
        with tf.variable_scope(scope):
            gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
                                    initializer=tf.constant_initializer(scale),  # ?=?为何作者没有这步骤
                                    trainable=trainable)
            return l2_norm * gamma

    # /=== ===>下面:定义cnn所需组件<=== ====\
    # |=== ===conv2d, max_pool2d, pad2d, dropout
    # |=== ===tf.layers.conv2d(inputs=xxx, filters=xxx, kernel_size=xxx,
    #                          stride=xxx, padding=xxx, dilation_rate=xxx,
    #                          name=xxx, activation=xxx)
    # |=== ===tf.layers.max_pooling2d(inputs=xxx, pool_size=xxx,
    #                                 strides=xxx, padding=xxx,
    #                                 name=xxx)
    # |=== ===tf.pad(x, paddings=xxx)
    # |=== ===tf.layers.dropout(inputs=xxx, rate=xxx)
    # |=== ===定义一个卷积的操作: 1输入 2卷积核个数 3卷积核大小| 4步长 5padding 6膨胀| 7激活函数 8名字
    def conv2d(self, x, filter, k_size,  # 输入x, 卷积核的个数filter, k_size卷积核是几*几
               stride=[1, 1], padding='same', dilation=[1, 1],  # #  步长stride, padding, 空洞卷积指数这里1代表正常卷积
               activation=tf.nn.relu, scope='conv2d'):  # 激活函数relu, 名字scope
        return tf.layers.conv2d(inputs=x, filters=filter, kernel_size=k_size,
                                strides=stride, dilation_rate=dilation, padding=padding,
                                name=scope, activation=activation)

    def max_pool2d(self, x, pool_size,
                   stride,
                   scope='max_pool2d'):  # 我猜padding是vaild
        return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding='valid', name=scope)

    # 用于填充s=2的第8,9层.    从6层往后的卷积层需要自己填充, 不要用它自带的填充.
    def pad2d(self, x,
              pad):
        return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])

    def dropout(self, x, d_rate=0.5):
        return tf.layers.dropout(inputs=x, rate=d_rate)

    def ssd_prediction(self, x, num_classes, box_num, isL2norm, scope='multibox'):
        reshape = [-1] + x.getshape().as_list()[1:-1]  # 去除第1,4数,拿到2,3个数,变成列表.即去除第一个和最后一个得到shape
        # python中 a = [1,2,3,4], b = a[1:3]=a[1:-1], c = [-1] + b
        # print(b) = [2,3],   print(c) = [-1, 1, 2, 3, 4]
        # block8为例:shape = (?, 10, 10, 512)需要把第2,3个数拿出来
        # 前面的-1表示batch, 因为不知道是多少在这里tf一般写-1
        # reshape = [-1, 10, 10]
        with tf.variable_scope(scope):  # 开始进行卷积
            if isL2norm:
                x = self.l2norm(x)  # 先判断是否需要归一化
                # ==>预测位置:坐标和大小,回归问题:不需softmax
                location_pred = self.conv2d(x, filter=box_num * 4, k_size=[3 * 3],
                                            activation=None, scope='conv_loc')
                '''filter:卷积核的个数=一个锚点多少框 x 一个框对应的4个数据xywh, 卷积核3x3,不需要激活函数,默认def conv2d有激活函数的'''
                location_pred = tf.reshape(location_pred, reshape + [box_num, 4])  # 每个中心点生成一个锚点框?=?
                # reshape + [box_num, 4] = [-1, 10, 10, box_num, 4]

                # ==>预测类别:分类问题:需要softmax
                class_pred = self.conv2d(x, filter=box_num * num_classes, k_size=3 * 3,
                                         activation=None, scope='conv_cls')
                '''filter:卷积核的个数=一个锚点多少框 x 一个框对应的21个类别, 卷积核3x3,不需要激活函数,默认def conv2d是有激活函数的'''
                class_pred = tf.shape(class_pred, reshape + [box_num, num_classes])  # ?=?
                # reshape + [box_num, num_classes] = [-1, 10, 10, box_num, num_classes]
                print(location_pred, class_pred)
                return location_pred, class_pred
    # \=== ===>上面:定义cnn所需组件<=== ====/

    # /=== ===>下面:具体网络架构-start<=== ===\
    def set_net(self):
        check_points = {}  # 装特征层的字典,用于循环迭代
        predictions = []
        locations = []

        x = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
        with tf.variable_scope('ssd_300_vgg'):
            # ===>VGG前5层<===
            # b1
            net = self.conv2d(x, filter=64, k_size=[3, 3], scope='conv1_1')  # 64个3*3卷积核, s=1 默认,标准卷积
            net = self.conv2d(net, 64, [3, 3], scope='conv1_2')  # 64个3*3卷积核, s=1默认
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool1')  # 池化层2*2卷积核, s=2 默认,池化层一般都是2
            # b2
            net = self.conv2d(net, filter=128, k_size=[3, 3], scope='conv2_1')
            net = self.conv2d(net, 128, [3, 3], scope='conv2_2')
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool2')
            # b3
            net = self.conv2d(net, filter=256, k_size=[3, 3], scope='conv3_1')
            net = self.conv2d(net, 256, [3, 3], scope='conv3_2')
            net = self.conv2d(net, 256, [3, 3], scope='conv3_3')
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool3')
            # b4 =>第1个检测层
            net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv4_1')
            net = self.conv2d(net, 512, [3, 3], scope='conv4_2')
            net = self.conv2d(net, 512, [3, 3], scope='conv4_3')
            check_points['block4'] = net
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool4')

            # b5 关键部分来了,这里与vgg不同了
            net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv5_1')
            net = self.conv2d(net, 512, [3, 3], scope='conv5_2')
            net = self.conv2d(net, 512, [3, 3], scope='conv5_3')
            net = self.max_pool2d(net, pool_size=[3, 3], stride=[1, 1], scope='pool5')  # =>池化层3*3核, 步长变成1*1

            # ===>卷积层,代替VGG全连接层<===
            # b6 conv6: 3x3x1024-d6
            net = self.conv2d(net, filter=1024, k_size=[3, 3], dilation=[6, 6], scope='conv6')
            # => 个数1024, dilation=[6, 6]

            # b7 conv7: 1x1x1024 =>第2个检测层
            net = self.conv2d(net, filter=1024, k_size=[1, 1], scope='conv7')
            # => 个数1024, 卷积核是[1, 1]
            check_points['block7'] = net

            # b8 conv8_1: 1x1x256; conv8_2: 3x3x512-s2-vaild =>第3个检测层
            net = self.conv2d(net, 256, [1, 1], scope='conv8_1x1')  # =>个数256,卷积核1x1
            net = self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scope='conv8_3x3', padding='valid')
            # =>个数512, 卷积核3x3, 步长2, 'valid'
            check_points['block8'] = net

            # b9 conv9_1: 1x1x128 conv8_2: 3x3x256-s2-vaild =>第4个检测层
            net = self.conv2d(net, 128, [1, 1], scope='conv9_1x1')  # =>个数128,卷积核1x1
            net = self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scope='conv9_3x3', padding='valid')
            # =>个数256,卷积核3x3,步长2x2, valid
            check_points['block9'] = net

            # b10 conv10_1: 1x1x128 conv10_2: 3x3x256-s1-valid =>第5个检测层
            net = self.conv2d(net, 128, [1, 1], scope='conv10_1x1')  # =>个数128,卷积核1x1
            net = self.conv2d(net, 256, [3, 3], scope='conv10_3x3', padding='valid')
            # =>个数256,valid
            check_points['block10'] = net

            # b11 conv11_1: 1x1x128 conv11_2: 3x3x256-s1-valid =>第6检测层
            net = self.conv2d(net, 128, [1, 1], scope='conv11_1x1')  # =>个数128,卷积核1x1
            net = self.conv2d(net, 256, [3, 3], scope='conv11_3x3', padding='valid')
            # =>个数256, valid
            check_points['block11'] = net

            for i, j in enumerate(self.feature_layers):  # 枚举特征层i表示第几个, j是名字如'block4'
                loc, cls = self.ssd_prediction(
                    x=check_points[j],
                    num_classes=self.num_classes,
                    box_num=self.boxes_len[i],
                    isL2norm=self.isL2norm[i],
                    scope=j + '_box'
                )
                predictions.append(tf.nn.softmax(cls))  # 需要softmax
                locations.append(loc)  # 不需要
            print(check_points)  # 检查网络的结构, eg:block8: (?, 10, 10, 512)
            print(locations, predictions)
            return locations, predictions, x
            # locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w
            # locations是列表, 里面的元素形如:[-1, 10, 10, box_num, 4]

    # \=== ===>上面:具体网络架构<=== ===/
# \=== === === ===>      ssd网络架构部分-end       <=== === === ===/

# /=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===\
    # /=== ===> 1:先验框生成-start <=== ===\
    # |=== 以block8为例10x10, 生成先验框
    # |=== h0,      h1,     h2,       h3,       h4,      h5:
    # |=== 小正方形  大正方形  1/2长方形  2/1长方形  1/3长方形  3/1长方形
    def ssd_anchor_layer(self, img_size, feature_map_size,
                         anchor_size, anchor_ratio, anchor_step,
                         box_num, offset=0.5):
        # 提取feature map 的每一个坐标
        y, x = np.mgrid[0: feature_map_size[0], 0:feature_map_size[1]]  # 以block8为例这里是 0:10,0:10
        # >>> y, x= np.mgrid[0:10, 0:10]
        # >>> print(x)
        # [[0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]
        #  [0 1 2 3 4 5 6 7 8 9]]
        # >>> print(y)
        # [[0 0 0 0 0 0 0 0 0 0]
        #  [1 1 1 1 1 1 1 1 1 1]
        #  [2 2 2 2 2 2 2 2 2 2]
        #  [3 3 3 3 3 3 3 3 3 3]
        #  [4 4 4 4 4 4 4 4 4 4]
        #  [5 5 5 5 5 5 5 5 5 5]
        #  [6 6 6 6 6 6 6 6 6 6]
        #  [7 7 7 7 7 7 7 7 7 7]
        #  [8 8 8 8 8 8 8 8 8 8]
        #  [9 9 9 9 9 9 9 9 9 9]]

        y = (y.astype(np.float32) + offset) * anchor_step / img_size[0]
        x = (x.astype(np.float32) + offset) * anchor_step / img_size[1]

        # 计算两个长宽比为1的 h, w
        h = np.zeros((box_num,), np.float32)
        w = np.zeros((box_num,), np.float32)
        # h >>> array([ 0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)
        # w >>> array([ 0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

        h[0] = anchor_size[0] / img_size[0]  # 小正方形
        w[0] = anchor_size[0] / img_size[0]
        h[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0]  # 大正方形
        w[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0]

        for i, j in enumerate(anchor_ratio):
            h[i + 2] = anchor_size[0] / img_size[0] / (j ** 0.5)
            w[i + 2] = anchor_size[0] / img_size[0] * (j ** 0.5)
        return y, x, h, w
        # h[0]=99/300           w[0]=99/300:             小     正方型 h w
        # h[1]=sqrt(99*513)/300 w[1]=sqrt(99*513)/300:   大     正方型 h w
        # h[2]=99/300/sqrt(2)   w[2]=99/300*sqrt(2):     横向 - 长方型2/1 对应 anchor_ratio[0] = 2
        # h[3]=99/300/sqrt(0.5) w[2]=99/300*sqrt(0.5):   纵向 | 长方型1/2 对应 anchor_ratio[1] = .5
        # h[4]=99/300/sqrt(3)   w[2]=99/300*sqrt(3):     横向 - 长方型3/1 对应 anchor_ratio[2] = 3
        # h[5]=99/300/sqrt(1/3) w[2]=99/300*sqrt(1/3):   纵向 | 长方型1/3 对应 anchor_ratio[3] = 1./3
        #             h[0]正方型    h[1]正方型    h[2]长方型2/1   h[3]长方型1/2   h[4]长方型3/1   h[5]长方型1/3
        # h = array([ 0.33000001,  0.41024384,  0.23334524,   0.46669048,    0.19052559,    0.57157677], dtype=float32)
        # w = array([ 0.33000001,  0.41024384,  0.46669048,   0.23334524,    0.57157677,    0.19052559], dtype=float32)
    # \=== ===> 1:先验框生成-end   <=== ===/

    # /=== ===>   2:解码-start  <=== ===\
    def ssd_decode(self, location, box, prior_scaling):
        y_a, x_a, h_a, w_a = box
        cx = location[:, :, :, :, 0] * w_a * prior_scaling[0] + x_a  # ?=?这部分应该是w h
        cy = location[:, :, :, :, 1] * h_a * prior_scaling[1] + y_a
        # locations是set_net网络的返回值
        # locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w
        # locations是列表, 里面的元素形如:[-1, 10, 10, box_num, 4]
        # 在这里location被带入了locations[2], 也就是block8层
        w = w_a * tf.exp(location[:, :, :, :, 2] * prior_scaling[2])
        h = h_a * tf.exp(location[:, :, :, :, 3] * prior_scaling[3])  # 实际格子的高度
        print(cx, cy, w, h)

        bboxes = tf.stack([cy - h/2.0, cx - w/2.0, cy + h/2.0, cx + w/2.0], axis=-1)
        # 特征图比较多,需要叠加起来
        # 左上角点的y坐标 cy-h/2, x坐标cx-w/2. 右下角点的y坐标 cy+h/2, x坐标cx+w/2
        print(bboxes)
        return bboxes

    # \=== ===>   2:解码-end    <=== ===/

    # /=== ===> 3:先验框筛选 <=== ===\
    def choose_anchor_boxes(self, predictions, anchor_box, n_box):
        # predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes]
        anchor_box = tf.reshape(anchor_box, [n_box, 4])  # 5d张量改为2d张量  ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4
        prediction = tf.reshape(predictions, [n_box, 21])
        prediction = prediction[:, 1:]  # 第一个0是背景的置信度, 我们不需要,从1往后取
        classes = tf.argmax(prediction, axis=1) + 1  # 得到preditions的概率最大类别的索引值, 1表示按行找最大+1: 是因为代码里是0开始
        scores = tf.reduce_max(prediction, axis=1)  # 得到最大类别的得分, 当大于阈值就保留下来(下面后话)

        filter_mask = scores > self.threshold
        classes = tf.boolean_mask(classes, filter_mask)   # 前面放筛选目标, 后面放筛选条件
        scores = tf.boolean_mask(scores, filter_mask)
        anchor_box = tf.boolean_mask(anchor_box, filter_mask)

        return classes, scores, anchor_box
        # 需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask()
    # \=== ===> 3:先验框筛选 <=== ===/
# \=== === === ===> 先验框生成*解码*先验框筛选-start <=== === === ===/


if __name__ == '__main__':
    sd = ssd()
    locations, predictions, x = sd.set_net()
    box = sd.ssd_anchor_layer(sd.img_size, (10, 10), (99., 153.), [2., .5, 3., 1/3], 32, 6)
    boex = sd.ssd_decode(locations[2], box, sd.prior_scaling)
    print(boex)  # shape = (?, 10, 10, 6, 4)
    # 这里以block8为例:的输出结果为Tensor("stacck:0", shape=(?, 10, 10, 6, 4), dtype=float32)
    # 10, 10表示的是第三个特征层为10x10, 因为是locations[2] ?=?
    # 6表示六个特征图 ?=?
    # 4表示 左上角&右下角坐标 max_x max_y min_x min_y
    # locations[0]是38x38 locations[1]是19x19 locations[2]是10x10 locations[3]是5x5 [4]是3x3 [5]是2x2 [6]是1x1

    cls, sco, a_box = sd.choose_anchor_boxes(predictions[2], boex, sd.n_boxes[2])
    print('----------------------------')
    print(cls, sco, a_box)

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

计算机视觉-Archer

图像分割没有团队的同学可加群

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值