MobileNet v2在MobileNet的基础之上添加了类似ResNet网络结构,并在残差快内运用深度可分离卷积将残差快内将两个3x3的卷积核改为两个1x1和一个3x3的深度可分离卷积,该网络结构首先使用1x1的点卷积对输入图像的通道数目进行扩张,之后利用3x3的深度可分离卷积提取每一个通道的特征,最后再利用1x1的点卷积对特征进行通道压缩。其最终效果相比MobileNet v1网络模型的参数量减少,同时其准确性也有所提高。
论文链接:https://128.84.21.199/abs/1801.04381
论文名:MobileNetV2: Inverted Residuals and Linear Bottlenecks
该模型中加入了Resenet,当步长为1时加入Resenet,当步长为2时没有用Resenet。其基本模块结构如下图:
当步长为1时,特征图的大小不发生改变,可以直接将输入与输出相加,前提是大小和通道数均不发生变化。当步长为2时,特征图的大小发生改变,因此不可把输入直接与输出相加(可尝试把输出处理后于输入相加)。其总体的网络结构参数如下图:
代码如下:
import tensorflow as tf
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
train_data = {b'data': [], b'labels': []} # 两个items都是list形式
# 5*10000的训练数据和1*10000的测试数据,数据为dict形式,train_data[b'data']为10000 * 3072的numpy向量
# 3072个数字表示图片特征,前1024个表示红色通道,中间1024表示绿色通道,最后1024表示蓝色通道
# train[b'labels']为长度为10000的list,每一个list数字对应以上上3072维的一个特征
# 加载训练数据
for i in range(5):
with open("H:\data\cifar-10-batches-py/data_batch_" + str(i + 1), mode='rb') as file:
data = pickle.load(file, encoding='bytes')
train_data[b'data'] += list(data[b'data'])
train_data[b'labels'] += data[b'labels']
# 加载测试数据
with open("H:\data\cifar-10-batches-py\est_batch", mode='rb') as file:
test_data = pickle.load(file, encoding='bytes')
# 定义一些变量
NUM_LABLES = 10 # 分类结果为10类
FC_SIZE = 512 # 全连接隐藏层节点个数
BATCH_SIZE = 100 # 每次训练batch数
lamda = 0.004 # 正则化系数,这里添加了正则化但是没有使用
sess = tf.InteractiveSession()
# 初始化权值
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1) # 生成一个截断的正态分布
return tf.Variable(initial)
# 初始化偏置
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#深度可分离卷积
def dwconv(input, filter_size, in_filters, mutil_filters, strides):
f2 = tf.Variable(tf.truncated_normal([filter_size, filter_size, in_filters, mutil_filters], stddev=0.1))
return tf.nn.depthwise_conv2d(input, f2, strides, padding="SAME", rate=None, name=None, data_format=None)
# 池化层
def max_pool_2x2(x):
# ksize [1,x,y,1]
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 2D卷积
def _conv(x, filter_size, in_fliters, out_filters, strides):
with tf.variable_scope("conv"):
n = filter_size * filter_size * out_filters
# 获取或新建卷积核,正态随机初始化
b = bias_variable([out_filters])
kernel = tf.Variable(tf.truncated_normal([filter_size, filter_size, in_fliters, out_filters], stddev=0.1))
# 计算卷积
return tf.nn.conv2d(x, kernel, strides, padding='SAME') + b
# 把步长值转换成tf.nn.conv2d需要的步长数组
def stride_arr(stride):
return [1, stride, stride, 1]
def BN(input,c):
axis =[0,1,2]
wb_mean, wb_var = tf.nn.moments(input, axis)
scale = tf.Variable(tf.ones([c]))
offset = tf.Variable(tf.zeros([c]))
variance_epsilon = 0.001
out = tf.nn.batch_normalization(input, wb_mean, wb_var, offset, scale, variance_epsilon)
return out
def bottle(input,filter,kernel,t,strides,is_training,name,r=False):
x=input
inputfilter=x.get_shape()[3].value
out_one=t*inputfilter
x=_conv(x,1,inputfilter,out_one,stride_arr(1))
name1=name+"0"
name2=name+"1"
name3=name+"3"
name4=name+"4"
x=dwconv(x,kernel,out_one,1,stride_arr(strides))
x=bn_layer(x,name1,name2,is_training)
x=tf.nn.relu6(x)
x=_conv(x,1,out_one,filter,stride_arr(1))
x = bn_layer(x,name3,name4, is_training)
if(r):
x=x+input
return x
# 实现Batch Normalization
def bn_layer(x,name1,name2,is_training,name='BatchNorm',moving_decay=0.9,eps=1e-5):
# 获取输入维度并判断是否匹配卷积层(4)或者全连接层(2)
print(is_training)
shape = x.shape
assert len(shape) in [2,4]
param_shape = shape[-1]
print(param_shape)
with tf.variable_scope(name):
# 声明BN中唯一需要学习的两个参数,y=gamma*x+beta
gamma = tf.get_variable(name1,param_shape,initializer=tf.constant_initializer(1))
beta = tf.get_variable(name2, param_shape,initializer=tf.constant_initializer(0))
# 计算当前整个batch的均值与方差
axes = list(range(len(shape)-1)) #特征的通道数
print(axes)
batch_mean, batch_var = tf.nn.moments(x,axes,name='moments')
# 采用滑动平均更新均值与方差
ema = tf.train.ExponentialMovingAverage(moving_decay)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean,batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
# 训练时,更新均值与方差,测试时使用之前最后一次保存的均值与方差
mean, var = tf.cond(tf.equal(is_training,True),mean_var_with_update,
lambda:(ema.average(batch_mean),ema.average(batch_var)))
# 最后执行batch normalization
return tf.nn.batch_normalization(x,mean,var,beta,gamma,eps)
#整体参残差模块的代码
def resudual_conv(input,kernel,filter,strides,t,n,is_training,name):
name1=name+"00"
x=bottle(input,filter,kernel,t,strides,is_training,name1)
for i in range(1,n):
name2=name+str(i)
x=bottle(x,filter,kernel,t,1,is_training,name2,r=True)
return x
# 对输入进行占位操作,输入为BATCH*3072向量,输出为BATCH*10向量
x = tf.placeholder(tf.float32, [None, 3072])
y_ = tf.placeholder(tf.float32, [None, NUM_LABLES])
# 对输入进行reshape,转换成3*32*32格式
x_image = tf.reshape(x, [-1, 3, 32, 32])
# 转置操作,转换成滤波器做卷积所需格式:32*32*3,32*32为其二维卷积操作维度
x_image = tf.transpose(x_image, [0, 2, 3, 1])
is_training =tf.placeholder(tf.bool)
learnrate =tf.placeholder(tf.float32)
conv1 = _conv( x_image, 3, 3, 32, stride_arr(2))
#参数说明,输入数据,卷积核大小,输出通道数,步长,扩大倍数,循环次数
residualx=resudual_conv(conv1,3,16,1,1,1,is_training,"res1")
residualx=resudual_conv(residualx,3,24,2,6,2,is_training,"res2")
residualx=resudual_conv(residualx,3,32,2,6,3,is_training,"res3")
residualx=resudual_conv(residualx,3,64,2,6,4,is_training,"res4")
residualx=resudual_conv(residualx,3,96,1,6,3,is_training,"res5")
residualx=resudual_conv(residualx,3,160,2,6,3,is_training,"res6")
residualx=resudual_conv(residualx,3,320,1,6,1,is_training,"res7")
# h_pool10=max_pool_2x2(h_conv10)
# 将8 * 8 * 64 三维向量拉直成一行向量
h_pool2_flat = tf.reshape(residualx, [-1, 1 * 1 * 320])
# 第一层全连接
W_fc1 = weight_variable([ 1 * 1 * 320, 1280])
b_fc1 = bias_variable([1280])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# 对隐藏层使用dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 第二层全连接
W_fc2 = weight_variable([1280, 10])
b_fc2 = bias_variable([10])
log = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# w1_loss = lamda * tf.nn.l2_loss(W_fc1) # 对W_fc1使用L2正则化
# w2_loss = lamda * tf.nn.l2_loss(W_fc2) # 对W_fc2使用L2正则化
cross_entropy = tf.reduce_mean(-tf.multiply(y_,tf.log(prediction+0.000001)))
# 交叉熵损失
# cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=log))
# 总损失
# 用AdamOptimizer优化器训练
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
# 计算准确率
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # tf.cast将数据转换成指定类型
# 开始训练
# tf.global_variables_initializer().run()
sess.run(tf.initialize_all_variables())
# 对数据范围为0-255的训练数据做归一化处理使其范围为0-1,并将list转成numpy向量
x_train = np.array(train_data[b'data']) / 255
print(x_train.size)
# 将训练输出标签变成one_hot形式并将list转成numpy向量
y_train = np.array(pd.get_dummies(train_data[b'labels']))
# 对数据范围为0-255的测试数据做归一化处理使其范围为0-1,并将list转成numpy向量
x_test = test_data[b'data'] / 255
# 将测试输出标签变成one_hot形式并将list转成numpy向量
y_test = np.array(pd.get_dummies(test_data[b'labels']))
saver = tf.train.Saver()
acc=[]
loss=[]
losse=[]
accye=[]
with tf.Session() as sess:
initop = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(initop)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# while True:
rate=0.0001
# 训练
for i in range(50000):
# if(i==6000):
# rate=rate/10
# 100条数据为1个batch,轮流训练
start = i * BATCH_SIZE % 50000
# print(start)
_,accy,loss_=sess.run([train_step,accuracy,cross_entropy],feed_dict = {is_training:True,x: x_train[start: start + BATCH_SIZE],
y_: y_train[start: start + BATCH_SIZE],learnrate:rate,keep_prob:0.7})
# 每迭代100次在前200条个测试集上测试训练效果
if(i%100==0):
if(i%500==0 and i!=0):
accye.append(accy)
losse.append(loss_)
print("step %d,train , %g,loss %g"%(i,accy,loss_))
if i % 500 == 0 and i!=0:
# 测试准确率
loss_value, train_accuracy = sess.run([cross_entropy, accuracy],
feed_dict={is_training: True, x: x_test[0: 100],
y_: y_test[0: 100], keep_prob: 1.0})
acc.append(train_accuracy)
loss.append(loss_value)
# 该次训练的损失
print("step %d, test accuracy, %g loss %g" % (i, train_accuracy, loss_value))
#测试
test_accuracy = accuracy.eval(feed_dict = {is_training:False,x: x_test[0: 500], y_: y_test[0: 500],keep_prob:1.0})
print("test accuracy %g" % test_accuracy)
coord.request_stop()
coord.join(threads)
# 保存模型
saver.save(sess, 'net/my_net.ckpt')
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(acc, 'r')
ax1.plot(accye, 'b')
ax1.set_ylabel('accuracy')
# ax1.set_title("Double Y axis")
ax2 = ax1.twinx() # this is the important function
ax2.plot(loss, 'r')
ax2.plot(losse, 'b')
ax2.set_ylabel('loss')
# ax2.set_xlabel('Same X for both exp(-x) and ln(x)')
# plt.plot(acc, 'r')
# plt.plot(accye, 'b')
plt.show()