对与RNN的两种不同写法:
def add_cell(self):
lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias=1.0,
state_is_tuple=True)
with tf.name_scope('initial_state'):#变量名
#初始化cell
self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
#定义dynamic_rnn
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)
# 训练 LSTMRNN
if __name__ == '__main__':
# 搭建 LSTMRNN 模型
model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)
sess = tf.Session()
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter("logs", sess.graph)
sess.run(tf.global_variables_initializer())
# relocate to the local dir and run this line to view it on Chrome (http://0.0.0.0:6006/):
# $ tensorboard --logdir='logs'
# matplotlib可视化
plt.ion() # 设置连续 plot
plt.show()
# 训练 200 次
for i in range(200):
seq, res, xs = get_batch() # 提取 batch data
#用户已if的方式控制model.cell_init_state: state
if i == 0:
# 初始化 data
feed_dict = {
model.xs: seq,
model.ys: res,
}
else:
feed_dict = {
model.xs: seq,
model.ys: res,
model.cell_init_state: state # 保持 state 的连续性
}
# 训练
_, cost, state, pred = sess.run(
[model.train_op, model.cost, model.cell_final_state, model.pred],
feed_dict=feed_dict)
# plotting
'''plt.plot(xs[0, :], res[0].flatten(), 'r', xs[0, :], pred.flatten()[:TIME_STEPS], 'b--')
plt.ylim((-1.2, 1.2))
plt.draw()
plt.pause(0.3) # 每 0.3 s 刷新一次'''
# 打印 cost 结果
if i % 20 == 0:
print('cost: ', round(cost, 4))
result = sess.run(merged, feed_dict)
writer.add_summary(result, i)
第二种:
#定义使用lstm结构为循环体结构且使用dropout的深层循环神经网络
dropout_keep_prob = lstm_keep_prob if is_training else 1.0
lstm_cells=[
tf.nn.rnn_cell.DropoutWrapper(
tf.nn.rnn_cell.BasicLSTMCell(hidden_size),
output_keep_prob = dropout_keep_prob)
for _ in range(num_layers)
]
cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)
outputs = []
state = self.initial_state
with tf.variable_scope('RNN'):
for time_step in range(num_steps):
#用if语句控制:
if time_step >0 :
tf.get_variable_scope().reuse_variables()
#这里是每一个时间步(time_step)让state=state
cell_output,state=cell(inputs[:,time_step,:],state)
outputs.append(cell_output)
def run_epoch(session,model,batches,train_op,output_log,step):
#计算perplxity的辅助变量。
total_costs=0.0
iters=0
state =session.run(model.initial_state)
# 训练一个epoch
for x,y in batches:
#在当前batch上运行train_op并计算损失值。交叉熵损失函数(loss)计算的就是下一个单词
为给定单词的概率:
cost,state,_ = session.run_epoch(
[model.cost,model.final_state,train_op],
{model.input_data:x,
model.targets:y,
#这里是每一个epoch,让state=上一步的final_state
model.initial_state:state}
)
total_costs += cost #总的损失
iters += model.num_steps #迭代次数
#只有在训练时输出日志
if output_log and step%100==0:
print ("after %d steps , perplexit is %.3f" %
(step,np.exp(total_costs/iters)))
step +=1
#返回给指定model在给定数据上的perplexity的值
return step , np.exp(total_costs/iters) #e的多少次方