1、TFReords文件的读取代码
论文中对训练集合的图像进行了随机裁剪、亮度变化、以及对比度随机调整,同时也对数据进行了规范化处理,这样的预处理都增加了模型的鲁棒性,更加切合实际情况。
设备配置:显卡为GTX1050Ti,Anaconda3.5+python3.6.5+Tensorflow-gpu1.8.0
1.1、预处理图像的代码:
# 对训练/验证的图片进行处理——修剪、dtype、以及规范化
def image_preprocessing(image_buffer, image_size, train, thread_id=0):
with tf.op_scope([image_buffer], 'decode_jpeg'):
image = tf.image.decode_jpeg(image_buffer, channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
if train:
image = distort_image(image, image_size, image_size)
else:
image = eval_image(image, image_size, image_size)
image = data_normalization(image)
return image
# 训练数据集图像的预处理,用来增加数据的鲁棒性,从而使得model的稳定性更好。
def distort_image(image, hight, width):
# 将图片的随机剪裁(tf.random_crop)已经成为很普遍的数据扩充(data augmentation)方法,
# 从实际的效果来看图片随机剪裁(缩写为:IRC)不但提高了模型精度,也增强了模型稳定性
distorted_image = tf.random_crop(image, [hight, width, 3])
distorted_image = tf.image.random_flip_left_right(distorted_image) # 为随机翻转函数,增强数据的鲁棒性
distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) # 随机增加图像的亮度
distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # 随机增加图像的对比度
return distorted_image
# 对数据进行规范化处理
def data_normalization(image):
image = tf.image.per_image_standardization(image)
return image
1.2、数据预处理的完整代码及其注释(源码的部分修改和注释)
# -*- coding: utf-8 -*-
# @Time : 2019/3/18 14:39
# @Author : Chaucer_Gxm
# @Email : gxm4167235@163.com
# @File : data_process_chaucer.py
# @GitHub : https://github.com/Chaucergit/Code-and-Algorithm
# @blog : https://blog.csdn.net/qq_24819773
# @Software: PyCharm
from datetime import datetime
import os
import tensorflow as tf
import numpy as np
# from distutils.version import LooseVersion
# VERSION_GTE_0_12_0 = LooseVersion(tf.__version__) >= LooseVersion('0.12.0')
# 用来查找所有的 TFRecords 类型的文件
def data_files(data_dir, subset):
if subset not in ['train', 'validation']:
print('文件中不存在该 TFRecords 文件')
exit(-1) # 推出整个程序的运行
tf_record_pattern = os.path.join(data_dir, '%s-*' % subset) # 获取文件夹data_dir中所有的subnet数据
data_files = tf.gfile.Glob(tf_record_pattern) # 查找与tf_record_pattern='./Folds/tf/test_fold_is_0\\train-*'相匹配的所有文件,并以list的形式返回
print(data_files)
if not data_files:
print('在%s中未发现%s文件' % (data_dir, subset))
exit(-1)
return data_files
# 使用队列的方式对 TFRecords 文件进行高效的读取
def parse_example_proto(example_serialized):
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/height': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
'image/width': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1)
}
feature = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(feature['image/class/label'], dtype=tf.int32)
return feature['image/encoded'], label, feature['image/filename']
# 训练数据集图像的预处理,用来增加数据的鲁棒性,从而使得model的稳定性更好。
def distort_image(image, hight, width):
# 将图片的随机剪裁(tf.random_crop)已经成为很普遍的数据扩充(data augmentation)方法,
# 从实际的效果来看图片随机剪裁(缩写为:IRC)不但提高了模型精度,也增强了模型稳定性
distorted_image = tf.random_crop(image, [hight, width, 3])
distorted_image = tf.image.random_flip_left_right(distorted_image) # 为随机翻转函数,增强数据的鲁棒性
distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) # 随机增加图像的亮度
distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # 随机增加图像的对比度
return distorted_image
# 验证集的数据只需要达到和训练集一样的尺寸就好,无需更多的数据预处理
def eval_image(image, hight, width):
return tf.image.resize_images(image, [hight, width])
# 对数据进行规范化处理
def data_normalization(image):
image = tf.image.per_image_standardization(image)
return image
# 对训练/验证的图片进行处理——修剪、dtype、以及规范化
def image_preprocessing(image_buffer, image_size, train, thread_id=0):
with tf.op_scope([image_buffer], 'decode_jpeg'):
image = tf.image.decode_jpeg(image_buffer, channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
if train:
image = distort_image(image, image_size, image_size)
else:
image = eval_image(image, image_size, image_size)
image = data_normalization(image)
return image
def batch_inputs(data_dir, batch_size, image_size, train, num_preprocess_threads=4, num_readers=1, input_queue_memory_factor=16):
with tf.name_scope('Batch_processing'):
if train:
files = data_files(data_dir, 'train')
# 把数据送入管道中,并对数据进行随机的打乱
filename_queue = tf.train.string_input_producer(files, shuffle=True, capacity=16)
else:
files = data_files(data_dir, 'validation')
filename_queue = tf.train.string_input_producer(files, shuffle=True, capacity=1)
examples_per_shard = 1024 # 定义每个shard里的数据个数为1024
# 定义每个队列中对象的个数
'''
优化内存的使用:
一张图片大概占用的内存为: 299*299*3*4 bytes = 1MB
而默认的队列内存可容纳16张图片,而shard=1024
故此size=1024*16*1MB = 17.6GB
'''
min_queue_examples = examples_per_shard * input_queue_memory_factor
# 对于训练数据集,为了更好的模拟现实环境,使用随机队列打乱的方式输出元素
if train:
examples_queue = tf.RandomShuffleQueue(capacity=min_queue_examples + 3*batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.string])
# 对于验证数据集,采用先入先出的方式就好
else:
# capacity 为队列的最大容量
examples_queue = tf.FIFOQueue(capacity=examples_per_shard+3*batch_size, dtypes=[tf.string])
# 创建多个读取器 readers 去读取 TFRecords 文件
if num_readers > 1:
enqueue_ops = []
for _ in range(num_readers):
reader = tf.TFRecordReader()
_, value = reader.read(filename_queue)
enqueue_ops.append(examples_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
example_serialized = examples_queue.dequeue()
else:
reader = tf.TFRecordReader()
_, example_serialized = reader.read(filename_queue)
images_labels_fnames = []
for thread_id in range(num_preprocess_threads):
# 对队列中的 TFRecords 文件进行处理——读取、预处理、规范化
image_buffer, label_index, fname = parse_example_proto(example_serialized)
image = image_preprocessing(image_buffer, image_size, train, thread_id)
images_labels_fnames.append([image, label_index, fname])
images, label_index_batch, fnames = tf.train.batch_join(images_labels_fnames, batch_size=batch_size, capacity=2*num_preprocess_threads*batch_size)
images = tf.cast(images, tf.float32)
images = tf.reshape(images, shape=[batch_size, image_size, image_size, 3])
# 在训练的过程中显示图片
tf.summary.image('image', images, 30)
return images, tf.reshape(label_index_batch, [batch_size]), fnames
def distorted_inputs(data_dir, batch_size=128, image_size=227, num_preprocess_threads=4):
with tf.device('/cpu:0'):
images, labels, filenames = batch_inputs(data_dir, batch_size, image_size, train=True, num_preprocess_threads=num_preprocess_threads, num_readers=1)
return images, labels, filenames
2、训练的代码
# -*- coding: utf-8 -*-
# @Time : 2019/3/18 13:42
# @Author : Chaucer_Gxm
# @Email : gxm4167235@163.com
# @File : Train_study.py
# @GitHub : https://github.com/Chaucergit/Code-and-Algorithm
# @blog : https://blog.csdn.net/qq_24819773
# @Software: PyCharm
from six.moves import xrange
from datetime import datetime
import os
import numpy as np
import tensorflow as tf
import time
from data_process_chaucer import distorted_inputs
from tensorflow.contrib.layers import *
from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base
import json
import re
# from model_self import select_model
LAMBDA = 0.01
MOM = 0.9
tf.app.flags.DEFINE_string('pre_checkpoint_path', '', '之前训练的模型的路径')
tf.app.flags.DEFINE_string('train_dir', './Folds/tf/test_fold_is_0', 'TFRecords 训练数据集文件所在的位置')
tf.app.flags.DEFINE_boolean('log_device_placement', False, '设备信息记录')
tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, '训练时所有的线程数量')
tf.app.flags.DEFINE_string('optim', 'Momentum', 'Optimizer 优化器的选择')
tf.app.flags.DEFINE_integer('image_size', 227, '训练数据集中设置的图片大小')
tf.app.flags.DEFINE_float('eta', 0.01, '优化器的学习率')
tf.app.flags.DEFINE_float('pdrop', 0.0, 'Dropout 神经元的比例')
tf.app.flags.DEFINE_integer('max_steps', 40000, '训练的次数')
tf.app.flags.DEFINE_integer('steps_per_decay', 10000, '每训练多少步对学习率进行衰减')
tf.app.flags.DEFINE_float('eta_decay_rate', 0.1, '学习率的衰减指数')
tf.app.flags.DEFINE_integer('epochs', -1, '每次训练的 epoch')
tf.app.flags.DEFINE_integer('batch_size', 128, '训练时每次输入的 batch_size')
tf.app.flags.DEFINE_string('checkpoint', 'checkpoint', '生成 Checkpoint 的默认名称')
tf.app.flags.DEFINE_string('model_type', 'default', '深度卷积网络的类型')
tf.app.flags.DEFINE_string('pre_model', '', '预训练模型的名称')
FLAGS = tf.app.flags.FLAGS
# 选择进行预训练的模型
def select_model(name):
if name.startswith('inception'):
print('选择的预训练模型为:', name)
return inception_v3
elif name == 'bn':
print('选择规则模型进行预训练')
return levi_hassner_bn
print('选择默认的预训练模型')
return levi_hassner
# inception-v3 的预训练模型
def inception_v3(nlabels, images, pkeep, is_train):
pass
# levi_hassner_bn 的预训练模型
def levi_hassner_bn(nlabels, images, pkeep, is_train):
pass
# levi_hassner 的预训练模型,本实验中只使用了此模型
def levi_hassner(nlabels, images, pkeep, is_train):
weight_decay = 0.0005
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
with tf.variable_scope('LeviHassner', 'LeviHassner', [images]) as scope:
with tf.contrib.slim.arg_scope([convolution2d, fully_connected], weights_regularizer=weights_regularizer,
biases_initializer=tf.constant_initializer(1.0), weights_initializer=tf.random_normal_initializer(stddev=0.005),
trainable=True):
with tf.contrib.slim.arg_scope([convolution2d], weights_initializer=tf.random_normal_initializer(stddev=0.01)):
conv1 = convolution2d(images, 96, [7, 7], [4, 4], padding='VALID', biases_initializer=tf.constant_initializer(0.0), scope='conv1')
pool1 = max_pool2d(conv1, 3, 2, padding='VALID', scope='pool1')
norm1 = tf.nn.local_response_normalization(pool1, 5, alpha=0.0001, beta=0.75, name='norm1')
conv2 = convolution2d(norm1, 256, [5, 5], [1, 1], padding='SAME', scope='conv2')
pool2 = max_pool2d(conv2, 3, 2, padding='VALID', scope='pool2')
norm2 = tf.nn.local_response_normalization(pool2, 5, alpha=0.0001, beta=0.75, name='norm2')
conv3 = convolution2d(norm2, 384, [3, 3], [1, 1], biases_initializer=tf.constant_initializer(0.0), padding='SAME', scope='conv3')
pool3 = max_pool2d(conv3, 3, 2, padding='VALID', scope='pool3')
flat = tf.reshape(pool3, [-1, 384*6*6], name='reshape_flat')
full1 = fully_connected(flat, 512, scope='full_1')
drop1 = tf.nn.dropout(full1, pkeep, name='drop_1')
full2 = fully_connected(drop1, 512, scope='full_2')
drop2 = tf.nn.dropout(full2, pkeep, name='drop_2')
with tf.variable_scope('output') as scope:
weights = tf.Variable(tf.random_normal([512, nlabels], mean=0.0, stddev=0.01), name='weights')
biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases')
output = tf.add(tf.matmul(drop2, weights), biases, name=scope.name)
return output
def loss(logits, labels):
labels = tf.cast(labels, tf.int32)
# 计算logits 和 labels 之间的稀疏 softmax 交叉熵,不可以使用 tf.nn.softmax_cross_entropy_with_logits()
# tf.nn.softmax_cross_entropy_with_logits() 中的labels为tf.nn.sparse_softmax_cross_entropy_with_logits()中的labels的度热编码模式
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean) # 把变量cross_entropy_mean放入一个集合,把很多变量变成一个列表
losses = tf.get_collection('losses') # 从一个结合中取出全部变量,是一个列表
regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # 把loss的损失值加入集合REGULARIZATION_LOSSES
total_loss = cross_entropy_mean + LAMBDA * sum(regularization_losses) # 总的损失值 = 误差 + 误差的损失值 * 损失系数
tf.summary.scalar('tl_raw', total_loss)
loss_averages = tf.train.ExponentialMovingAverage(0.999, name='avg') # 来实现滑动平均模型,他使用指数衰减来计算变量的移动平均值
loss_averages_op = loss_averages.apply(losses + [total_loss]) # apply()方法添加了训练变量的影子副本,并保持了其影子副本中训练变量的移动平均值操作。在每次训练之后调用此操作,更新移动平均值。
for l in losses+[total_loss]:
tf.summary.scalar(l.op.name + '(raw)', l)
tf.summary.scalar(l.op.name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]): # tf.control_dependencies()管理器设计是用来控制计算流图的,给图中的某些计算指定顺序。
total_loss = tf.identity(total_loss) # tf.identity在计算图内部创建了两个节点,send / recv节点,用来发送和接受两个变量,
# 如果两个变量在不同的设备上,比如 CPU 和 GPU,那么将会复制变量,如果在一个设备上,将会只是一个引用。
return total_loss
def exponential_staircase_decay(at_step=10000, decay_rate=0.1):
def _decay(lr, global_step):
return tf.train.exponential_decay(lr, global_step, at_step, decay_rate, staircase=True)
return _decay
def optimizer(optim, eta, loss_fn, at_step, decay_rate):
global_step = tf.Variable(0, trainable=False)
optz = optim
if optim == 'Adadelta':
optz = lambda lr: tf.train.AdadeltaOptimizer(lr, 0.95, 1e-6)
lr_decay_fn = None
elif optim == 'Momentum':
optz = lambda lr: tf.train.MomentumOptimizer(lr, MOM)
lr_decay_fn = exponential_staircase_decay(at_step, decay_rate)
return tf.contrib.layers.optimize_loss(loss_fn, global_step, eta, optz, clip_gradients=4.0, learning_rate_decay_fn=lr_decay_fn)
def main(argv=None):
# 因为用到 4 个线程,把新生成图作为整个 tensorflow 运行环境的默认图
with tf.Graph().as_default():
# 1.选择的训练网络模型卷积神络类型
model_pre = select_model(FLAGS.model_type)
# 2.打开训练数据集的目录,然后把数据集加载到 model 中
input_file = os.path.join(FLAGS.train_dir, 'md.json')
print(input_file)
with open(input_file, 'r') as f:
md = json.load(f)
print('数据集的参数为:', md)
# 3.对数据及进行读取和线程分配 batch
images, labels, _ = distorted_inputs(FLAGS.train_dir, FLAGS.batch_size, FLAGS.image_size, FLAGS.num_preprocess_threads)
print(images.shape, '\n', labels.shape)
# 4.进行预训练,得到预测结果
logits = model_pre(md['nlabels'], images , 1-FLAGS.pdrop, True)
# print(logits.eval(sess=tf.Session()))
total_loss = loss(logits, labels)
print(total_loss)
train_op = optimizer(FLAGS.optim, FLAGS.eta, total_loss, FLAGS.steps_per_decay, FLAGS.eta_decay_rate)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement))
tf.global_variables_initializer().run(session=sess)
if FLAGS.pre_checkpoint_path:
if tf.gfile.Exists(FLAGS.pre_checkpoint_path) is True:
print('尝试从%s路径恢复 Checkpoint 文件' % FLAGS.pre_checkpoint_path)
restorer = tf.train.Saver()
tf.train.latest_checkpoint(FLAGS.pre_checkpoint_path)
print('在%s,从路径%s恢复预训练模型' % FLAGS.pre_checkpoint_path)
run_dir = '%s/run-%d'% (FLAGS.train_dir, os.getpid())
checkpoint_path = '%s/%s' % (run_dir, FLAGS.checkpoint)
if tf.gfile.Exists(run_dir) is False:
print('创建文件夹%s' % run_dir)
tf.gfile.MakeDirs(run_dir)
tf.train.write_graph(sess.graph_def, run_dir, 'model.pb', as_text=True)
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.summary.FileWriter(run_dir, sess.graph)
steps_per_train_epoch = int(md['train_counts'] / FLAGS.batch_size)
num_steps = FLAGS.max_steps if FLAGS.epochs < 1 else FLAGS.epochs * steps_per_train_epoch
print('******************')
for step in xrange(num_steps):
start = time.time()
_, loss_value = sess.run([train_op, total_loss])
duration = time.time() - start
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
example_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('在%s,第%d步,loss为%.3f(%.1f examples/sec; %.3f ''sec/batch)')
print(format_str %(datetime.now(), step, loss_value, example_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0:
saver.save(sess, checkpoint_path, global_step=step)
if __name__ == '__main__':
tf.app.run()
3、训练的部分结果:
在2019-03-18 21:11:32.239774,第0步,loss为2.153(29.2 examples/sec; 4.376 sec/batch)
在2019-03-18 21:11:45.365983,第10步,loss为1.977(139.7 examples/sec; 0.917 sec/batch)
在2019-03-18 21:11:54.587029,第20步,loss为2.069(143.2 examples/sec; 0.894 sec/batch)
在2019-03-18 21:12:03.767037,第30步,loss为2.085(141.0 examples/sec; 0.908 sec/batch)
在2019-03-18 21:12:12.940990,第40步,loss为1.986(137.4 examples/sec; 0.932 sec/batch)
在2019-03-18 21:12:22.294523,第50步,loss为1.923(129.1 examples/sec; 0.992 sec/batch)
在2019-03-18 21:12:32.117612,第60步,loss为2.022(126.1 examples/sec; 1.015 sec/batch)
在2019-03-18 21:12:43.330441,第70步,loss为1.925(85.8 examples/sec; 1.491 sec/batch)
在2019-03-18 21:12:55.501968,第80步,loss为2.018(103.1 examples/sec; 1.242 sec/batch)
在2019-03-18 21:13:09.393838,第90步,loss为1.936(92.3 examples/sec; 1.387 sec/batch)
在2019-03-18 21:13:20.349541,第100步,loss为1.995(138.3 examples/sec; 0.926 sec/batch)
在2019-03-18 21:13:31.674801,第110步,loss为1.948(132.2 examples/sec; 0.968 sec/batch)
在2019-03-18 21:13:42.132834,第120步,loss为2.056(125.1 examples/sec; 1.023 sec/batch)
在2019-03-18 21:13:52.224789,第130步,loss为1.994(129.6 examples/sec; 0.987 sec/batch)
在2019-03-18 21:14:02.434484,第140步,loss为1.992(123.5 examples/sec; 1.036 sec/batch)
年龄和性别识别参考的代码:
源代码:https://github.com/dpressel/rude-carnie
模型下载:Adience数据集:http://www.openu.ac.il/home/hassner/Adience/data.html#agegender