周末折腾了一些,模型在多gpu上训练,在训练过程做checkpoint,然后restore保存为pb格式模型文件,后面把模型放cpu上预测,结果报错,报错原因大概上有些op在gpu上操作,但是预测在cpu上面,上午折腾了下原来保存模型时候要指定cpu和gpu,但是google的模型是把一些namescope名称给弄下来,写了个方法来保存,在cpu上和gpu上预测具有普适性,先看我实现的方法把。
首先是单卡多gpu上训练:
from model import TextCNNMulFilterSize
from configuration import TCNNConfig
from data_utils import inputs,getvocablen
import time
import numpy as np
from datetime import datetime
import tensorflow as tf
import os
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify-poi/tensorflow/tf.records"
vocapath=basepath+"/credit-tftextclassify-poi/tensorflow/vocab.txt"
modelpath=basepath+"/credit-tftextclassify-poi/tensorflow/"
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2'
print(modelpath,"poi识别model开始训练")
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/tmp/model/poi/',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 30000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('batch_size', 30,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
vocablen=getvocablen(vocapath)
config = TCNNConfig()
config.vocab_size = vocablen
def tower_loss(scope):
x_train, y_train = inputs(data_path,batch_size=FLAGS.batch_size,num_epochs=3)
keep_prob = tf.constant(0.9, dtype=tf.float32)
model = TextCNNMulFilterSize(config, x_train, y_train, keep_prob)
_=model.loss
losses = tf.get_collection('losses', scope)
total_loss = tf.add_n(losses, name='total_loss')
return total_loss
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def train():
with tf.Graph().as_default(), tf.device('/cpu:0'):
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0), trainable=False)
lr = tf.train.exponential_decay(0.1,
global_step,
100,
0.9999,
staircase=True)
# Create an optimizer that performs gradient descent.
opt = tf.train.GradientDescentOptimizer(lr)
# Calculate the gradients for each model tower.
tower_grads = []
with tf.variable_scope(tf.get_variable_scope()):
for i in range(FLAGS.num_gpus):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % ("tower", i)) as scope:
loss = tower_loss(scope)
tf.get_variable_scope().reuse_variables()
grads = opt.compute_gradients(loss)
tower_grads.append(grads)
grads = average_gradients(tower_grads)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(
0.9999, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
train_op = tf.group(apply_gradient_op, variables_averages_op)
saver = tf.train.Saver(tf.global_variables())
init = tf.global_variables_initializer()
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
for step in range(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
examples_per_sec = num_examples_per_step / duration
sec_per_batch = duration / FLAGS.num_gpus
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)
sess.close()
def main(argv=None):
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
train()
if __name__ == '__main__':
tf.app.run()
模型保存为pb格式,在cpu上预测:
from model import TextCNNMulFilterSize
from configuration import TCNNConfig
from data_utils import inputs,getvocablen
import time
import numpy as np
from datetime import datetime
import tensorflow as tf
import os
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify-poi/tensorflow/tf.records"
vocapath=basepath+"/credit-tftextclassify-poi/tensorflow/vocab.txt"
modelpath=basepath+"/credit-tftextclassify-poi/tensorflow/"
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2'
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/tmp/model/poi/',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 30000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('batch_size', 30,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
tf.app.flags.DEFINE_string('pathpb', "model/graph.model",
"""最后导入pb模型的路径""")
vocablen=getvocablen(vocapath)
config = TCNNConfig()
config.vocab_size = vocablen
def export():
with tf.Graph().as_default(), tf.device('/cpu:0'):
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0), trainable=False)
with tf.variable_scope(tf.get_variable_scope()):
for i in range(FLAGS.num_gpus):
with tf.device('/cpu:%d' % i): #这里指定cpu,如果不指定预测会报错
with tf.name_scope('%s_%d' % ("tower", i)) as scope:
x_train = tf.placeholder(tf.int32, [None, None], name="input_x")
y_train = tf.placeholder(tf.float32, [None, None], name="input_y")
keep_prob = tf.constant(0.9, dtype=tf.float32,name="keep_prob")
model = TextCNNMulFilterSize(config, x_train, y_train, keep_prob)
output=model.pred_y
saver = tf.train.Saver(tf.global_variables())
init = tf.global_variables_initializer()
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
sess.run(tf.global_variables_initializer())
latest_ckpt = tf.train.latest_checkpoint(FLAGS.train_dir)
print(latest_ckpt)
saver.restore(sess, latest_ckpt)
print(x_train.op.name)
print(keep_prob.op.name, output.op.name)
output_graph_def = tf.graph_util. \
convert_variables_to_constants(sess, sess.graph_def, [x_train.op.name, keep_prob.op.name, output.op.name])
tf.train.write_graph(output_graph_def, '.', FLAGS.pathpb, as_text=False)
print("导入成功")
def main(argv=None):
if tf.gfile.Exists(FLAGS.pathpb):
tf.gfile.DeleteRecursively(FLAGS.pathpb)
export()
if __name__ == '__main__':
tf.app.run()