TensorFlow的slim模块
TensorFlow提供了slim模块,可以很方便地写出VGG网络。这里推荐博文:
- tensorflow中slim模块api介绍,这篇文章有slim的api文档,里面也有一个vgg网络简单demo;
- 【Tensorflow slim】 slim.arg_scope的用法,可以重点看slim.arg_scope的用法;
- Tensorflow slim库使用小记;
- tensorflow中的slim函数集合。
训练过程中Loss稳定不下降
自己实现VGG时需要注意一个问题,就是有可能开始训练了loss却一直稳定在某个数值不下降,这时分析损失函数。一般采用的都是交叉熵cross entropy loss value,其定义如下:
L
=
−
log
y
^
p
。
L = - \log \hat{y}_p \text{。}
L=−logy^p。
对于二类分类,有
L
=
−
(
y
ln
y
^
+
(
1
−
y
)
ln
(
^
1
−
y
^
)
)
,
L = - \big( y \ln \hat{y} + (1 - y) \ln \hat(1 - \hat{y}) \big) \text{,}
L=−(ylny^+(1−y)ln(^1−y^)),
注意这里没有右下角标
p
p
p,有
y
1
=
y
y_1 = y
y1=y,
y
^
1
=
y
^
\hat{y}_1 = \hat{y}
y^1=y^,
y
^
0
=
1
−
y
^
1
\hat{y}_0 = 1 - \hat{y}_1
y^0=1−y^1。当网络给出的二类概率向量为
[
0.5
,
0.5
]
[0.5,0.5]
[0.5,0.5]时,loss值为
0.69
0.69
0.69。
对于四分类,loss可能恒稳定在值 1.386 1.386 1.386左右。
本人尝试过,最有效的方法是将参数的gaussian初始方式修改为xavier, 否则训练的时候可能不会收敛。参数的初始化方式很重要。(batch normalization我没试过)
参考博文:
最后一层为线性激活函数
ReLu可能造成神经元永久失活的问题,推荐最后一层换为线性恒等激活。
代码
网络结构,利用slim.arg_scope设置relu激活函数,初始化方式为xavier_initializer,采用L2正则化
import tensorflow as tf
import tensorflow.contrib.slim as slim
def generate_vgg16(x_input, category_size, is_training):
with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu,
weights_initializer=slim.xavier_initializer(),
weights_regularizer=slim.l2_regularizer(0.0005),
):
net = slim.repeat(x_input, 2, slim.conv2d, 64, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
net = slim.flatten(net, scope='flat5')
net = slim.fully_connected(net, 4096, scope='fc6')
net = tf.cond(is_training, lambda: slim.dropout(net, 0.5, scope='dropout6'), lambda: net)
net = slim.fully_connected(net, 4096, scope='fc7')
net = tf.cond(is_training, lambda: slim.dropout(net, 0.5, scope='dropout7'), lambda: net)
net = slim.fully_connected(net, category_size, activation_fn=None, scope='fc8')
return net
if __name__ == '__main__':
import config as cf
receptive_field_side = cf.rpn_receptive_field_side # 网络视野边长
channel_num = cf.rpn_channel_num # RGB图像,通道为3
category_size = cf.rpn_category_size # 类别个数
batch_size = 30
is_training = tf.placeholder(tf.bool, [])
x_input_shape = [batch_size, receptive_field_side, receptive_field_side, channel_num]
x_input = tf.compat.v1.placeholder(tf.float32, shape=x_input_shape)
net_output = generate_vgg16(x_input, category_size, is_training)
print(net_output)
训练代码
def train(num_train, batch_size=8, learning_rate=0.001):
receptive_field_side = cf.rpn_receptive_field_side # 网络视野边长
channel_num = cf.rpn_channel_num # RGB图像,通道为3
category_size = cf.category_size # 类别个数
is_training = tf.placeholder(tf.bool, [])
x_input_shape = [batch_size, receptive_field_side, receptive_field_side, channel_num]
x_input = tf.placeholder(tf.float32, shape=x_input_shape)
net_output = vgg16.generate_vgg16(x_input, category_size, is_training)
y_output = tf.placeholder(tf.int32, shape=[batch_size, category_size])
loss_cls = slim.losses.softmax_cross_entropy(net_output, y_output)
loss_regular = tf.add_n(slim.losses.get_regularization_losses())
loss_total = loss_cls + loss_regular
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_steps=500,
decay_rate=0.9)
# opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
# 注意,这里未启用权重衰减,global_step我没有加进去,详见API文档
train_op = slim.learning.create_train_op(loss_total, opt)
with tf.Session() as sess:
if not os.path.exists('./save/vgg/'):
init = tf.global_variables_initializer()
sess.run(init)
else:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('./save/vgg/'))
for i in range(num_train):
batch_size_training_data = util.sample_batch_size_training_data(batch_size=batch_size)
x_data = []
y_data = []
for datum in batch_size_training_data:
image_path = datum[0]
bbox_region = datum[1]
category = datum[-1]
image = Image.open(image_path)
x_data.append(np.array(util.crop_image(image, bbox_region, True))) # 这里的True是在训练时启用了数据增广策略
y_data.append(util.cls2onehot(cf.category2id_dict[category], cf.category_size))
x_data = np.array(x_data)
y_data = np.array(y_data)
if (i + 1) % 10 == 0:
loss_total_1, loss_cls_1 = sess.run([loss_total, loss_cls],
feed_dict={x_input: x_data, y_output: y_data, is_training: False})
sess.run(train_op, feed_dict={x_input: x_data, y_output: y_data, is_training: True})
if (i + 1) % 10 == 0:
loss_total_2, loss_cls_2 = sess.run([loss_total, loss_cls],
feed_dict={x_input: x_data, y_output: y_data, is_training: False})
print('{:.2%}'.format((i + 1) / num_train), end=', ')
print('loss_total: %.7f' % (loss_total_1), '\t=> %.7f' % loss_total_2,
'\tloss_cls: %.7f' % (loss_cls_1), '\t=> %.7f' % loss_cls_2)
if (i + 1) % 10 == 0:
if not os.path.exists('./save/'):
os.mkdir('./save/')
if not os.path.exists('./save/vgg/'):
os.mkdir('save/vgg/')
saver = tf.train.Saver()
saver.save(sess, './save/vgg/model')
if not os.path.exists('./save/'):
os.mkdir('./save/')
if not os.path.exists('./save/vgg/'):
os.mkdir('save/vgg/')
saver = tf.train.Saver()
saver.save(sess, './save/vgg/model')
迁移学习
推荐先阅读博文:
Tensorflow(五)用VGG实现迁移学习(代码实现)。
这里导入预训练VGG的前三部分参数(注意exclude关键字和include关键字)
with tf.Session() as sess:
if not os.path.exists('./save/vgg_pretrained/'):
init = tf.global_variables_initializer()
sess.run(init)
# Transfer learning
params = slim.get_variables_to_restore(include=['conv1', 'conv2', 'conv3'])
saver = tf.train.Saver(params)
saver.restore(sess, tf.train.latest_checkpoint('./save/vgg_pretrained/'))
else:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('./save/vgg/'))