手撸SSD

下面展示一些 内联代码片

// A code block
var foo = 'bar';
// An highlighted block
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt


class ssd(object):

    def __init__(self):
        self.num_boxes = []
        self.feature_map_size = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]
        self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
                        "bus", "car", "cat", "chair", "cow", "diningtable",
                        "dog", "horse", "motorbike", "person", "pottedplant",
                        "sheep", "sofa", "train", "tvmonitor"]
        self.feature_layers = ['block4', 'block7', 'block8', 'block9', 'block10', 'block11']
        self.img_size = (300, 300)
        self.boxes_len = [4, 6, 6, 6, 4, 4]
        self.num_classes = 21
        self.isL2norm = [True, False, False, False, False, False]
        self.anchor_sizes = [[21., 45.], [45., 99.], [99., 153.], [153., 207.], [207., 261.], [261., 315.]]
        self.anchor_ratios = [[2, .5], [2, .5, 3, 1. / 3], [2, .5, 3, 1. / 3],
                              [2, .5, 3, 1. / 3], [2, .5], [2, .5]]
        self.anchor_steps = [8, 16, 32, 64, 100, 300]
        self.prior_scaling = [0.1, 0.1, 0.2, 0.2]  # 特征图先验框缩放比例
        self.n_boxes = [5776, 2166, 600, 150, 36, 4]  # 8732个
        self.threshold = 0.2

        pass

    def l2norm(self, x, trainable=True, scope='L2Normalization'):
        n_channels = x.get_shape().as_list()[-1]  # 通道数
        l2_norm = tf.nn.l2_normalize(x, dim=[3], epsilon=1e-12)  # 只对每个像素点在channels上做归一化
        with tf.variable_scope(scope):
            gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
                                    trainable=trainable)
        return l2_norm * gamma

    def conv2d(self, x, filter, k_size, stride=[1, 1], padding='same', dilatio=[1, 1], activation=tf.nn.relu,
               scrope='conv2d'):
        return tf.layers.conv2d(inputs=x, filters=filter, kernel_size=k_size, strides=stride, padding=padding,
                                name=scrope, activation=activation)

    def max_pool2d(self, x, pool_size, stride, scrope='max_pool2d'):
        return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, name=scrope, padding='same')

    def pad2d(self, x, pad):
        return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])

    def dropout(self, x, d_rate=0.5):
        return tf.layers.dropout(inputs=x, rate=d_rate)

    def ssd_predict(self, x, num_classes, box_num, isL2norm, scope='multibox'):
        reshape = [-1] + x.get_shape().as_list()[1: -1]
        with tf.variable_scope(scope):
            if isL2norm:
                x = self.l2norm(x)
                print(x)
            # 预测位置 --》坐标和大小  回归
            location_pred = self.conv2d(x, filter=box_num * 4, k_size=[3, 3], activation=None, scrope='conv_loc')
            location_pred = tf.reshape(location_pred, reshape + [box_num, 4])
            # 预测类别  --》分类 softmax
            class_pred = self.conv2d(x, filter=box_num * num_classes, k_size=[3, 3], activation=None, scrope='conv_cls')
            class_pred = tf.reshape(class_pred, reshape + [box_num, num_classes])
            # print("location_pred:", location_pred)
            # print("class_pred:", class_pred)
            print(location_pred, class_pred)
            return location_pred, class_pred

    def set_net(self):

        check_points = {}
        predictions = []
        locations = []
        x = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
        with tf.variable_scope('ssd_300_vgg'):
            # b1
            net = self.conv2d(x, filter=64, k_size=[3, 3], scrope='conv1_1')
            net = self.conv2d(net, 64, [3, 3], scrope='conv1_2')
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scrope='pool1')
            # b2
            net = self.conv2d(net, filter=128, k_size=[3, 3], scrope='conv2_1')
            net = self.conv2d(net, 128, [3, 3], scrope='conv2_2')
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scrope='pool2')
            # b3
            net = self.conv2d(net, filter=256, k_size=[3, 3], scrope='conv3_1')
            net = self.conv2d(net, 256, [3, 3], scrope='conv3_2')
            net = self.conv2d(net, 256, [3, 3], scrope='conv3_3')
            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scrope='pool3')
            # b4
            net = self.conv2d(net, filter=512, k_size=[3, 3], scrope='conv4_1')
            net = self.conv2d(net, 512, [3, 3], scrope='conv4_2')
            net = self.conv2d(net, 512, [3, 3], scrope='conv4_3')
            check_points['block4'] = net

            net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scrope='pool4')
            # b5
            net = self.conv2d(net, filter=512, k_size=[3, 3], scrope='conv5_1')
            net = self.conv2d(net, 512, [3, 3], scrope='conv5_2')
            net = self.conv2d(net, 512, [3, 3], scrope='conv5_3')
            net = self.max_pool2d(net, pool_size=[3, 3], stride=[1, 1], scrope='pool5')
            # b6
            net = self.conv2d(net, 1024, [3, 3], dilatio=[6, 6], scrope='conv6')
            # b7
            net = self.conv2d(net, 1024, [3, 3], scrope='conv7')
            check_points['block7'] = net
            # b8
            net = self.conv2d(net, 256, [1, 1], scrope='conv8_1x1')
            net = self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scrope='conv8_3x3', padding='valid')
            check_points['block8'] = net
            # b9
            net = self.conv2d(net, 128, [1, 1], scrope='conv9_1x1')
            net = self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scrope='conv9_3x3', padding='valid')
            check_points['block9'] = net
            # b10
            net = self.conv2d(net, 128, [1, 1], scrope='conv10_1x1')
            net = self.conv2d(net, 256, [3, 3], scrope='conv10_3x3', padding='valid')
            check_points['block10'] = net
            # b11
            net = self.conv2d(net, 128, [1, 1], scrope='conv11_1x1')
            net = self.conv2d(net, 256, [3, 3], scrope='conv11_3x3', padding='valid')
            check_points['block11'] = net

            # print(check_points)

            for i, j in enumerate(self.feature_layers):
                loc, cls = self.ssd_predict(check_points[j],
                                            num_classes=self.num_classes,
                                            box_num=self.boxes_len[i],
                                            isL2norm=self.isL2norm[i],
                                            scope=j + '_box'
                                            )
                predictions.append(tf.nn.softmax(cls))
                locations.append(loc)
            return locations, predictions, x

    ############## 先验框的生成(编码过程)
    def ssd_anchor_layer(self, img_size, feature_map_size, anchor_size, anchor_ratio, anchor_step, box_num, offset=0.5):
        # 提取feature map的每一个坐标
        y, x = np.mgrid[0:feature_map_size[0], 0:feature_map_size[1]]
        # print("x:", x)
        # print("y:", y)
        #
        # plt.scatter(x, y)
        # plt.show()

        y = (y.astype(np.float32) + offset) * anchor_step / img_size[0]
        x = (x.astype(np.float32) + offset) * anchor_step / img_size[1]

        y = np.expand_dims(y, axis=-1)
        x = np.expand_dims(x, axis=-1)
        # 计算两个长宽比为1的h、w

        h = np.zeros((box_num,), np.float32)
        w = np.zeros((box_num,), np.float32)

        h[0] = anchor_size[0] / img_size[0]
        w[0] = anchor_size[0] / img_size[0]
        h[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0]  # **就是开根号
        w[1] = (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[1]

        for i, j in enumerate(anchor_ratio):
            h[i + 2] = anchor_size[0] / img_size[0] / (j ** 0.5)
            w[i + 2] = anchor_size[0] / img_size[1] * (j ** 0.5)

        return np.expand_dims(x, axis=-1), np.expand_dims(y, axis=-1), h, w

    # 解码过程
    def ssd_decode(self, location, box, prior_scaling):
        x, y, h, w = box
        cx = location[:, :, :, :, 0] * x * prior_scaling[0] + x
        cy = location[:, :, :, :, 1] * y * prior_scaling[1] + y
        cw = w * tf.exp(location[:, :, :, :, 2] * prior_scaling[2])
        ch = h * tf.exp(location[:, :, :, :, 3] * prior_scaling[3])

        bboxes = tf.stack([cy - ch / 2.0, cx - cw / 2.0, cy + ch / 2.0, cx + cw / 2.0], axis=-1)
        print(bboxes)
        return bboxes


if __name__ == '__main__':
    sd = ssd()
    locations, predictions, x = sd.set_net()
    box = sd.ssd_anchor_layer(sd.img_size, (10, 10), (99., 153.), [2., .5, 3., 1 / 3], 32, 6)
    # print(x, y, h, w)
    # print(y.shape, x.shape, h.shape, w.shape)
    # plt.scatter(box[1], box[0])
    # plt.show()
    # print(locations[2].shape)
    boex = sd.ssd_decode(locations[2], box, sd.prior_scaling)
    print(boex)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值