https://www.bilibili.com/video/av43996494/?p=2
0上一节没写完的代码
#!usr/bin/python
# -*- coding: utf-8 -*-
# Creation Date: 2019/7/10
import tensorflow as tf
import numpy as np
import cv2
class ssd(object):
def __init__(self):
pass # 先略过
# ===>l2正则化<===
def l2norm(self, x, scale, trainable=True, scope='L2Normalization'):
n_channels = x.get_shape().as_list()[-1] # 通道数. 得到形状,变成列表,取后一个
l2_norm = tf.nn.l2_normalize(x, dim=[3], epsilon=1e-12) # 只对每个像素点在channels上做归一化
with tf.variable_scope(scope):
gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
initializer=tf.constant_initializer(scale),
trainable=trainable)
return l2_norm * gamma
# ===>下面开始定义所需组件<===
# conv2d, max_pool2d, pad2d, dropout
# 定义一个卷积的操作 1输入 2卷积核个数 3卷积核大小 4步长 5padding 6膨胀 7激活函数 8名字
def conv2d(self, x, filter, # 输入x, 卷积核的个数filter
k_size, stride=[1, 1], # k_size卷积核是几*几,步长stride
padding='same', dilation=[1, 1], # padding, 空洞卷积指数这里1代表正常卷积
activation=tf.nn.relu, scope='conv2d'): # 激活函数relu, 名字scope
return tf.layers.conv2d(input=x, filters=filter, kernel_size=k_size,
strides=stride, padding=padding, dilation_rate=dilation,
name=scope, activation=activation)
def max_pool2d(self, x, pool_size, stride, scope='max_pool2d'): # 我猜padding是vaild
return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding='valid', name=scope)
# 用于填充s=2的第8,9层. 从6层往后的卷积层需要自己填充, 不要用它自带的填充.
def pad2d(self, x, pad):
return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])
def dropout(self, x, d_rate=0.5):
return tf.layers.dropout(inputs=x, rate=d_rate)
# ===>下面开始写网络架构<===
def set_net(self):
check_points = {} # 装特征层的字典,用于循环迭代
x = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
with tf.variable_scope('ssd_300_vgg'):
# ===>VGG前5层<===
# b1
net = self.conv2d(x, filter=64, k_size=[3, 3], scope='conv1_1') # 64个3*3卷积核, s=1 默认,标准卷积
net = self.conv2d(net, 64, [3, 3], scope='conv1_2') # 64个3*3卷积核, s=1默认
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2]) # 池化层2*2卷积核, s=2 默认,池化层一般都是2
# b2
net = self.conv2d(net, filter=128, k_size=[3, 3], scope='conv2_1')
net = self.conv2d(net, 128, [3, 3], scope='conv2_2')
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool2')
# b3
net = self.conv2d(net, filter=256, k_size=[3, 3], scope='conv3_1')
net = self.conv2d(net, 256, [3, 3], scope='conv3_2')
net = self.conv2d(net, 256, [3, 3], scope='conv3_3')
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool3')
# b4 =>第1个检测层
net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv4_1')
net = self.conv2d(net, 512, [3, 3], scope='conv4_2')
net = self.conv2d(net, 512, [3, 3], scope='conv4_3')
check_points['block4'] = net
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool4')
# b5 关键部分来了,这里与vgg不同了
net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv5_1')
net = self.conv2d(net, 512, [3, 3], scope='conv5_2')
net = self.conv2d(net, 512, [3, 3], scope='conv5_3')
net = self.max_pool2d(net, pool_size=[3, 3], stride=[1, 1], scope='pool5') # =>池化层3*3核, 步长变成1*1
# ===>卷积层,代替VGG全连接层<===
# b6 conv6: 3x3x1024-d6
net = self.conv2d(net, filter=1024, k_size=[3, 3], dilation=[6, 6], scope='conv6')
# => 个数1024, dilation=[6, 6]
# b7 conv7: 1x1x1024 =>第2个检测层
net = self.conv2d(net, filter=1024, k_size=[3, 3], scope='conv7')
# => 个数1024, 卷积核不是[1, 1] ?=?
check_points['block7'] = net
# b8 conv8_1: 1x1x256; conv8_2: 3x3x512-s2-vaild =>第3个检测层
net = self.conv2d(net, 256, [1, 1], scope='conv8_1x1') # =>个数256,卷积核1x1
net = self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scope='conv8_3x3', padding='valid')
# =>个数512, 卷积核3x3, 步长2, 'valid'
check_points['block8'] = net
# b9 conv9_1: 1x1x128 conv8_2: 3x3x256-s2-vaild =>第4个检测层
net = self.conv2d(net, 128, [1, 1], scope='conv9_1x1') # =>个数128,卷积核1x1
net = self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scope='conv9_3x3', padding='valid')
# =>个数256,卷积核3x3,步长2x2, valid
check_points['block9'] = net
# b10 conv10_1: 1x1x128 conv10_2: 3x3x256-s1-valid =>第5个检测层
net = self.conv2d(net, 128, [1, 1], scope='conv10_1x1') # =>个数128,卷积核1x1
net = self.conv2d(net, 256, [3, 3], scope='conv10_3x3', padding='valid')
# =>个数256,valid
check_points['block10'] = net
# b11 conv11_1: 1x1x128 conv11_2: 3x3x256-s1-valid =>第6检测层
net = self.conv2d(net, 128, [1, 1], scope='conv11_1x1') # =>个数128,卷积核1x1
net = self.conv2d(net, 256, [3, 3], scope='conv11_3x3', padding='valid')
# =>个数256, valid
check_points['block11'] = net
print(check_points)
if __name__ == '__main__':
sd = ssd()
sd.set_net()
1 完善网络
需要用到别人训练好的权重文件,已经下载好了
1 添加构造函数
一共8732个锚点框,我算的是8722
block4因为太大需要L2norm
2 最后加了两个卷积,来实现分类和锚点框定位
完善后的代码
#!usr/bin/python
# -*- coding: utf-8 -*-
# Creation Date: 2019/7/10
import tensorflow as tf
import numpy as np
import cv2
class ssd(object):
def __init__(self):
# ===>完善:构造函数的参数<===
self.num_boxes = [] # 统计锚点框的个数
self.feaeture_map_size = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)] # 特征图的大小
self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"] # 这里有20个,加上背景是21个类别
self.feature_layers = ['block4', 'block7', 'block8', 'block9', 'block10', 'block11'] # 用于检测的特征层的名字
self.img_size = (300, 300) # 图片的大小
self.num_classes = 21 # 类别的个数,背景也算一类, 第一个类似是:'bg'背景
self.boxes_len = [4, 6, 6, 6, 4, 4] # 6个特征图生成的一组锚点框的框的个数,4 10 11层是4个一组其它的是6个一组
# block4: 38x38大小的特征图就会生成 38x38x4 个锚点框 5766
# block7: 19x19大小的特征图就会生成 19x19x6 个锚点框 2166
# block8: 10x10大小的特征图就会生成 10x10x6 个锚点框 600
# block9: 5x5大小的特征图就会生成 5x5x6 个锚点框 150
# block10: 3x3大小的特征图就会生成 3x3x4 个锚点框 36
# block11: 1x1大小的特征图就会生成 1x1x4 个锚点框 4
# 一共8732个锚点框 ?=?我算出来的是8722个
self.isL2norm = [True, False, False, False, False, False] # block4比较靠前, 因为norm太大需要L2norm
# ?=?如何起作用的呢
# ===>l2正则化<===
def l2norm(self, x, scale, trainable=True, scope='L2Normalization'):
n_channels = x.get_shape().as_list()[-1] # 通道数. 得到形状,变成列表,取后一个
l2_norm = tf.nn.l2_normalize(x, dim=[3], epsilon=1e-12) # 只对每个像素点在channels上做归一化
with tf.variable_scope(scope):
gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
initializer=tf.constant_initializer(scale),
trainable=trainable)
return l2_norm * gamma
# ===>下面开始定义所需组件<===
# conv2d, max_pool2d, pad2d, dropout
# 定义一个卷积的操作 1输入 2卷积核个数 3卷积核大小 4步长 5padding 6膨胀 7激活函数 8名字
def conv2d(self, x, filter, # 输入x, 卷积核的个数filter
k_size, stride=[1, 1], # k_size卷积核是几*几,步长stride
padding='same', dilation=[1, 1], # padding, 空洞卷积指数这里1代表正常卷积
activation=tf.nn.relu, scope='conv2d'): # 激活函数relu, 名字scope
return tf.layers.conv2d(input=x, filters=filter, kernel_size=k_size,
strides=stride, padding=padding, dilation_rate=dilation,
name=scope, activation=activation)
def max_pool2d(self, x, pool_size, stride, scope='max_pool2d'): # 我猜padding是vaild
return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding='valid', name=scope)
# 用于填充s=2的第8,9层. 从6层往后的卷积层需要自己填充, 不要用它自带的填充.
def pad2d(self, x, pad):
return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])
def dropout(self, x, d_rate=0.5):
return tf.layers.dropout(inputs=x, rate=d_rate)
def ssd_prediction(self, x, num_classes, box_len, isL2norm, scope='multibox'):
reshape = [-1] + x.getshape().as_list()[1:-1] # 去除1 4维度,拿到2 3维度,变成列表.
# block8为例:shape = (?, 10, 10, 512)需要把23维度拿出来
# 前面的-1表示batch, 因为不知道是多少在这里tf一般写-1
with tf.variable_scope(scope): # 开始进行卷积
if isL2norm:
x = self.l2norm(x) # 先判断是否需要归一化
# ==>预测位置:坐标和大小,回归问题:不需softmax
location_pred = self.conv2d(x, filter=box_len * 4, k_size=[3 * 3], activation=None, scope='conv_loc')
'''filter:卷积核的个数=一个锚点多少框 x 一个框对应的4个数据xywh, 卷积核3x3,不需要激活函数,默认def conv2d有激活函数的'''
location_pred = tf.reshape(location_pred, reshape + [box_len, 4]) # 每个中心点生成一个锚点框?=?
# ==>预测类别:分类问题:需要softmax
class_pred = self.conv2d(x, filter=box_len * num_classes, k_size=3 * 3, activation=None,
scope='conv_cls')
'''filter:卷积核的个数=一个锚点多少框 x 一个框对应的21个类别, 卷积核3x3,不需要激活函数,默认def conv2d是有激活函数的'''
class_pred = tf.shape(class_pred, reshape + [box_len, num_classes]) # ?=?
print(location_pred, class_pred)
return location_pred, class_pred
# ===>下面开始写网络架构<===
def set_net(self):
check_points = {} # 装特征层的字典,用于循环迭代
predictions = []
locations = []
x = tf.placeholder(dtype=tf.float32, shape=[None, 300, 300, 3])
with tf.variable_scope('ssd_300_vgg'):
# ===>VGG前5层<===
# b1
net = self.conv2d(x, filter=64, k_size=[3, 3], scope='conv1_1') # 64个3*3卷积核, s=1 默认,标准卷积
net = self.conv2d(net, 64, [3, 3], scope='conv1_2') # 64个3*3卷积核, s=1默认
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2]) # 池化层2*2卷积核, s=2 默认,池化层一般都是2
# b2
net = self.conv2d(net, filter=128, k_size=[3, 3], scope='conv2_1')
net = self.conv2d(net, 128, [3, 3], scope='conv2_2')
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool2')
# b3
net = self.conv2d(net, filter=256, k_size=[3, 3], scope='conv3_1')
net = self.conv2d(net, 256, [3, 3], scope='conv3_2')
net = self.conv2d(net, 256, [3, 3], scope='conv3_3')
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool3')
# b4 =>第1个检测层
net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv4_1')
net = self.conv2d(net, 512, [3, 3], scope='conv4_2')
net = self.conv2d(net, 512, [3, 3], scope='conv4_3')
check_points['block4'] = net
net = self.max_pool2d(net, pool_size=[2, 2], stride=[2, 2], scope='pool4')
# b5 关键部分来了,这里与vgg不同了
net = self.conv2d(net, filter=512, k_size=[3, 3], scope='conv5_1')
net = self.conv2d(net, 512, [3, 3], scope='conv5_2')
net = self.conv2d(net, 512, [3, 3], scope='conv5_3')
net = self.max_pool2d(net, pool_size=[3, 3], stride=[1, 1], scope='pool5') # =>池化层3*3核, 步长变成1*1
# ===>卷积层,代替VGG全连接层<===
# b6 conv6: 3x3x1024-d6
net = self.conv2d(net, filter=1024, k_size=[3, 3], dilation=[6, 6], scope='conv6')
# => 个数1024, dilation=[6, 6]
# b7 conv7: 1x1x1024 =>第2个检测层
net = self.conv2d(net, filter=1024, k_size=[3, 3], scope='conv7')
# => 个数1024, 卷积核不是[1, 1] ?=?
check_points['block7'] = net
# b8 conv8_1: 1x1x256; conv8_2: 3x3x512-s2-vaild =>第3个检测层
net = self.conv2d(net, 256, [1, 1], scope='conv8_1x1') # =>个数256,卷积核1x1
net = self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scope='conv8_3x3', padding='valid')
# =>个数512, 卷积核3x3, 步长2, 'valid'
check_points['block8'] = net
# b9 conv9_1: 1x1x128 conv8_2: 3x3x256-s2-vaild =>第4个检测层
net = self.conv2d(net, 128, [1, 1], scope='conv9_1x1') # =>个数128,卷积核1x1
net = self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scope='conv9_3x3', padding='valid')
# =>个数256,卷积核3x3,步长2x2, valid
check_points['block9'] = net
# b10 conv10_1: 1x1x128 conv10_2: 3x3x256-s1-valid =>第5个检测层
net = self.conv2d(net, 128, [1, 1], scope='conv10_1x1') # =>个数128,卷积核1x1
net = self.conv2d(net, 256, [3, 3], scope='conv10_3x3', padding='valid')
# =>个数256,valid
check_points['block10'] = net
# b11 conv11_1: 1x1x128 conv11_2: 3x3x256-s1-valid =>第6检测层
net = self.conv2d(net, 128, [1, 1], scope='conv11_1x1') # =>个数128,卷积核1x1
net = self.conv2d(net, 256, [3, 3], scope='conv11_3x3', padding='valid')
# =>个数256, valid
check_points['block11'] = net
for i, j in enumerate(self.feature_layers): # 枚举特征层 i是个数, j是名字 blockx
loc, cls = self.ssd_prediction(x=check_points[j],
num_classes=self.classes,
box_len=self.boxes_len[i],
isL2norm=self.isL2norm[i],
scope=j + '_box'
)
predictions.append(tf.nn.softmax(cls)) # 需要softmax
locations.append(loc) # 不需要
print(check_points) # 检查网络的结构, eg:block8: (?, 10, 10, 512)
print(locations, predictions)
if __name__ == '__main__':
sd = ssd()
sd.set_net()
问题是我的还是运行不了:
报错如下
打印结果分析:
左边的输出结果
(?, 38, 38, 4,4)
38x38的特征图, 4个框, 4个预测值xywh(?, 19, 19, 6,4)
19x19的特征图, 6个框, 4个预测值xywh右边的输出结果
(?, 38, 38, 4,21)
38x38的特征图, 4个框, 21个类别(类别预测个数)(?, 19, 19, 6,21)
19x19的特征图, 6个框, 21个类别(类别预测个数)