1.参数设置
数据的读取采用上篇博客中的方法https://blog.csdn.net/zryowen123/article/details/79796387,将上篇的代码命名为TFRecord.py,在本篇中进行了引用。
import tensorflow as tf
import TFRecord as tfr # 自定义模块,用于进行tfrecords相关处理
from tensorflow.python.framework import graph_util
FILENAME_TRAIN = 'F:/PycharmProjects/deepLearning/cnn_cat&dog/data/tfrecord/224/cat_vs_dog_train.tfrecords' # 训练集
FILENAME_TEST = 'F:/PycharmProjects/deepLearning/cnn_cat&dog/data/tfrecord/224/cat_vs_dog_test.tfrecords' # 测试集
MIN_AFTER_DEQUEUE = 500 # 训练队列最小长度
BATCH_SIZE_TRAIN = 50 # 训练批次大小
CAPACITY_TRAIN = MIN_AFTER_DEQUEUE + 3 * BATCH_SIZE_TRAIN # 训练队列容积
BATCH_SIZE_TEST = 100 # 测试集批次大小
CAPACITY_TEST = 100 # 测试队列大小
SIZE = [224, 224, 3] # 图片尺寸
N_CLASSES = 2 # 类别数
# 以字典的形式设置权重和偏置
weights = {
'wc1': [11, 11, 3, 64],
'wc2': [5, 5, 64, 192],
'wc3': [3, 3, 192, 384],
'wc4': [3, 3, 384, 256],
'wc5': [3, 3, 256, 256],
'wd1': [7 * 7 * 256, 4096],
'wd2': [4096, 4096],
'out': [4096, N_CLASSES]
}
biases = {
'bc1': [64],
'bc2': [192],
'bc3': [384],
'bc4': [256],
'bc5': [256],
'bd1': [4096],
'bd2': [4096],
'out': [N_CLASSES]
}
2. 网络基本组件定义
# 定义一个函数,用于初始化所有的权值 W
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name='weigthts')
# 定义一个函数,用于初始化所有的偏置项 b
def bias_variable(shape):
initial = tf.constant(0.0, shape=shape)
return tf.Variable(initial, name='biases')
# 定义一个函数,用于构建卷积层
def conv2d(x, w, k, b, name):
return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w, strides=[1, k, k, 1], padding='SAME'), b), name=name)
# 定义一个函数,用于构建池化层
def max_pool(l_input, k1, k2, name):
return tf.nn.max_pool(l_input, ksize=[1, k1, k1, 1], strides=[1, k2, k2, 1], padding='SAME', name=name)
# 用来输出神经网络结构
def print_activations(t):
print(t.op.name, ' ', t.get_shape().as_list())
3.定义AlexNet网络结构
因为VGG网络得出的结论,LRN层作用不大,本文的AlexNet省去了LRN层,如果需要可以自己再加上。
def inference(images, weights, biases, keep_prob):
parameters = []
with tf.name_scope('conv1') as scope:
kernel = weight_variable(weights['wc1'])
bias = bias_variable(biases['bc1'])
conv1 = conv2d(images, kernel, 4, bias, name=scope)
print_activations(conv1)
parameters += [kernel, bias]
pool1 = max_pool(conv1, 3, 2, name='pool1') # 池化层1 步长2
print_activations(pool1)
with tf.name_scope('conv2') as scope:
kernel = weight_variable(weights['wc2'])
bias = bias_variable(biases['bc2'])
conv2 = conv2d(pool1, kernel, 1, bias, name=scope)
print_activations(conv2)
parameters += [kernel, bias]
pool2 = max_pool(conv2, 3, 2, name='pool2') # 池化层2 步长2
print_activations(pool2)
with tf.name_scope('conv3') as scope:
kernel = weight_variable(weights['wc3'])
bias = bias_variable(biases['bc3'])
conv3 = conv2d(pool2, kernel, 1, bias, name=scope)
print_activations(conv3)
parameters += [kernel, bias]
with tf.name_scope('conv4') as scope:
kernel = weight_variable(weights['wc4'])
bias = bias_variable(biases['bc4'])
conv4 = conv2d(conv3, kernel, 1, bias, name=scope)
print_activations(conv4)
parameters += [kernel, bias]
with tf.name_scope('conv5') as scope:
kernel = weight_variable(weights['wc5'])
bias = bias_variable(biases['bc5'])
conv5 = conv2d(conv4, kernel, 1, bias, name=scope)
print_activations(conv5)
parameters += [kernel, bias]
pool5 = max_pool(conv5, 3, 2, name='pool5') # 池化层5 步长2
print_activations(pool5)
# 全连接层
with tf.name_scope('fc1') as scope:
weight = weight_variable(weights['wd1'])
bias = bias_variable(biases['bd1'])
dense0 = tf.reshape(pool5, [-1, weight.get_shape().as_list()[0]]) # 先把特征图转为向量
dense1 = tf.nn.dropout(tf.nn.relu(tf.matmul(dense0, weight) + bias), keep_prob, name=scope) # dropout层 防止过拟合
print_activations(dense1)
parameters += [weight, bias]
# 全连接层
with tf.name_scope('fc2') as scope:
weight = weight_variable(weights['wd2'])
bias = bias_variable(biases['bd2'])
dense2 = tf.nn.relu(tf.matmul(dense1, weight) + bias, name=scope) # Relu activation
print_activations(dense2)
parameters += [weight, bias]
# 网络输出层
with tf.name_scope('fc3') as scope:
weight = weight_variable(weights['out'])
bias = bias_variable(biases['out'])
out = tf.add(tf.matmul(dense2, weight), bias, name=scope)
print_activations(out)
parameters += [weight, bias]
return out, parameters
4. 定义训练过程
由于按上篇博客的方法制作的数据集标签是数字形式,需要将其转化为向量。
# 将数字转为数组
def process_target(batch_y_, samples):
"""
:param batch_y_: 标签集,类型为数组,元素为数字形式的类的标签,如0,1,2...
:param samples: 类的个数
:return: 标签集,类型为数组,元素为数组形式的类的标签,如[1,0,...,0],[0,1,...,0]...
"""
y = []
for i in batch_y_:
arr = np.zeros(samples, dtype=np.float32)
arr[i] = 1
y.append(arr)
y_ = np.array(y)
return y_
训练过程为:
# 训练
def train(sess, steps):
image, label = tfr.get_train_batch(FILENAME_TRAIN, SIZE, MIN_AFTER_DEQUEUE, BATCH_SIZE_TRAIN,
CAPACITY_TRAIN) # 训练数据
image_test, label_test = tfr.get_test_batch(FILENAME_TEST, SIZE, BATCH_SIZE_TEST, CAPACITY_TEST) # 测试数据
x = tf.placeholder(tf.float32, [None, SIZE[0], SIZE[1], SIZE[2]], name='input') # 输入的数据占位符
y_ = tf.placeholder(tf.float32, shape=[None, N_CLASSES]) # 输入的标签占位符
keep_prob = tf.placeholder(tf.float32, name='keep_prob') # dropout层参数占位符
y, p = inference(x, weights, biases, keep_prob) # 向前传播结果
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=y_)) # 计算损失
train_step = tf.train.AdamOptimizer(0.001).minimize(cost) # 优化算法使用Adam
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # 精确度计算
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(steps):
batch_x, batch_y_ = sess.run([image, label])
batch_y_ = process_target(batch_y_, N_CLASSES)
train_acc = accuracy.eval(feed_dict={x: batch_x, y_: batch_y_, keep_prob: 1.0}) # 训练准确率
print('step %d, training accuracy %g' % (i, train_acc))
train_step.run(feed_dict={x: batch_x, y_: batch_y_, keep_prob: 0.75}) # 优化算法迭代
x_test, y_test = sess.run([image_test, label_test])
y_test = du.process_target(y_test, N_CLASSES) # 将数字转化为数组
test_acc = accuracy.eval(feed_dict={x: x_test, y_: y_test, keep_prob: 1.0}) # 计算测试集准确率
print("test accuracy %g" % test_acc)
coord.request_stop()
coord.join(threads)
5.定义持久化
由于只是用来检测,因此不用保存整个网络结构,将训练结果保存为pb文件
# 模型持久化
def persist(sess, path):
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, ['input', 'keep_prob', 'fc3'])
with tf.gfile.FastGFile(path, mode='wb') as f:
f.write(constant_graph.SerializeToString())
6.执行程序
if __name__ == '__main__':
sess = tf.InteractiveSession()
train(sess, 20000)
persist(sess, 'cat_vs_dog_224.pb')
sess.close()