前言
最近在学习LSTM,看了一些博客和基于tensorflow的实战代码,并和朋友交流了一下,感觉学的也还不错,主要是对思路的理解,把代码注释一下,方便大家理解,一篇博客记录下来,方便其他人,非常感谢参考的博客,大家可以看看本文的参考博客,了解一下内容,自己学习一下。
代码
环境说明:Windows 10 平台 + TensorFlow 1.5.0
代码目的:用sin()函数的值作为输入x,利用cos()的值作为输出y,模拟x到y的映射关系,来训练这么一个模型。
- 引用的包
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt
- 变量
batch_start = 0 # 建立 batch data 时候的 index time_step = 20 # backpropagation through time 的 time_steps,样本的序列长度 batch_size = 50 # 每次处理的样本数量 input_size = 1 # sin 数据输入 size 一个序列维度,[1,input_size] out_size = 1 # cos 数据输出 size cell_size = 10 # RNN 的 hidden unit size lr = 0.006 # learning rate
- 数据语料
def get_batch(): global batch_start, time_step # xs shape (50batch, 20steps) xs = np.arange(batch_start, batch_start+time_step*batch_size).reshape((batch_size, time_step)) / (10*np.pi) seq = np.sin(xs) res = np.cos(xs) batch_start += time_step # returned seq, res and xs: shape (batch, step, input) return [seq[:, :, np.newaxis], res[:, :, np.newaxis], xs] # seq[:, :, np.newaxis] 加一个第三维度
- LSTM主体,这个重点了解一下结构
class LSTMRNN(object): def __init__(self, n_steps, input_size, output_size, cell_size, batch_size): self.n_steps = n_steps self.input_size = input_size self.output_size = output_size self.cell_size = cell_size self.batch_size = batch_size with tf.name_scope('inputs'): self.xs = tf.placeholder(tf.float32, [None, n_steps, input_size], name='xs') self.ys = tf.placeholder(tf.float32, [None, n_steps, output_size], name='ys') with tf.variable_scope('in_hidden'): #variable_scope 区分不同name下的变量 self.add_input_layer() with tf.variable_scope('LSTM_cell'): self.add_cell() with tf.variable_scope('out_hidden'): self.add_output_layer() with tf.name_scope('cost'): self.compute_cost() with tf.name_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(self.cost) def add_input_layer(self,): l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D') # (batch*n_step, in_size) # Ws (in_size, cell_size) Ws_in = self._weight_variable([self.input_size, self.cell_size]) # bs (cell_size, ) bs_in = self._bias_variable([self.cell_size,]) # input_layer_y = (batch * n_steps, cell_size) with tf.name_scope('Wx_plus_b'): input_layer_y = tf.matmul(l_in_x, Ws_in) + bs_in # reshape input_layer_y ==> (batch, n_steps, cell_size) 神经网络的cell 输入 self.input_layer_y = tf.reshape(input_layer_y, [-1, self.n_steps, self.cell_size], name='2_3D') def add_cell(self): lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True) with tf.name_scope('initial_state'): self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32) self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(lstm_cell, self.input_layer_y, initial_state=self.cell_init_state, time_major=False) def add_output_layer(self): # shape = (batch * steps, cell_size) l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D') Ws_out = self._weight_variable([self.cell_size, self.output_size]) bs_out = self._bias_variable([self.output_size, ]) # shape = (batch * steps, output_size) with tf.name_scope('Wx_plus_b'): self.pred = tf.matmul(l_out_x, Ws_out) + bs_out def compute_cost(self): # 这个地方好好看看,这个是关于误差的分析 losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [tf.reshape(self.pred, [-1], name='reshape_pred')], [tf.reshape(self.ys, [-1], name='reshape_target')], [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)], average_across_timesteps=True, softmax_loss_function=self.ms_error, name='losses' ) with tf.name_scope('average_cost'): self.cost = tf.div( tf.reduce_sum(losses, name='losses_sum'), self.batch_size, name='average_cost') tf.summary.scalar('cost', self.cost) def ms_error(self, labels, logits): return tf.square(tf.subtract(labels, logits)) def _weight_variable(self, shape, name='weights'): initializer = tf.random_normal_initializer(mean=0., stddev=1.,) return tf.get_variable(shape=shape, initializer=initializer, name=name) def _bias_variable(self, shape, name='biases'): initializer = tf.constant_initializer(0.1) return tf.get_variable(name=name, shape=shape, initializer=initializer)
- 主函数代码
if __name__ == '__main__': # 搭建 LSTMRNN 模型 model = LSTMRNN(time_step, input_size, out_size, cell_size, batch_size) sess = tf.Session() # 初始化全部变量 sess.run(tf.global_variables_initializer()) # 训练 400 次 for i in range(400): seq, res, xs = get_batch() # 提取 batch data if i == 0: # 初始化 data feed_dict = { model.xs: seq, model.ys: res, } else: feed_dict = { model.xs: seq, model.ys: res, model.cell_init_state: state # 保持 state 的连续性 } # 训练 cost, state, pred = sess.run( [ model.cost, model.cell_final_state, model.pred], feed_dict=feed_dict) # 打印 cost 结果 if i % 20 == 0: print('cost: ', round(cost, 4)) tf.reset_default_graph()
参考博客