dynamic_rnn的定义
def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
“”"Creates a recurrent neural network specified by RNNCell cell
.
Performs fully dynamic unrolling of inputs
.
Example:
# create a BasicRNNCell
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
# 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
# defining initial state
initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
# 'state' is a tensor of shape [batch_size, cell_state_size]
outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
initial_state=initial_state,
dtype=tf.float32)
# create 2 LSTMCells
rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
# create a RNN cell composed sequentially of a number of RNNCells
multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
# 'outputs' is a tensor of shape [batch_size, max_time, 256]
# 'state' is a N-tuple where N is the number of LSTMCells containing a
# tf.contrib.rnn.LSTMStateTuple for each cell
outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=data,
dtype=tf.float32)
1、使用1个BasicRNNCell
我的代码数据输入为[3,6,4],单层RNN
outputs 是最后一层,每一个时刻的输出,所以她的输出格式为[batch_size, max_time, cell_state_size]
states是每一层,最后一时刻的输出,我这里是单层RNN ,所以输出格式为[batch_size, cell_state_size]
所以这里outputs 最后一层,最后一个时刻,既output[:,-1:] == states
2、使用多个BasicRNNCell
我的代码数据输入为[3,6,4],2层RNN
output是最后一层,每一个时刻的输出:[batch_size,max_time,cell_state_size]
state是每一层,最后一个时刻的输出:[2,batch_size,cell_state_size]
所以这里outputs 最后一层,最后一个时刻,既output[:,-1:]
state是最后一层,最后一个时刻的输出,既 state[-1]
得到output[:,-1:] == state[-1]
RNN的代码
import tensorflow as tf
import numpy as np
layer_number = 2
# 创建输入数据,3代表batch size,6代表输入序列的最大步长(max time),4代表每个序列的维度
X = np.random.randn(3, 6, 4)
# 第二个输入的实际长度为4
# X[1, 4:] = 0
# 记录三个输入的实际步长
X_lengths = [6, 6, 6]
rnn_hidden_size = 5
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.BasicRNNCell(rnn_hidden_size) for _ in range(layer_number)])
# cell = tf.nn.rnn_cell.BasicRNNCell(rnn_hidden_size)
outputs, last_states = tf.nn.dynamic_rnn(
cell=cell,
dtype=tf.float64,
sequence_length=X_lengths,
inputs=X)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
output, state = session.run([outputs, last_states])
print(np.shape(output))
print(output)
print(np.shape(state))
print(state)
3、使用1个BasicLSTMCell
我的代码数据输入为[3,6,4],1层LSTM
output 是最后一层,每一个时刻的输出:[batch_size,max_time,cell_state_size]
由于这里是LSTM,所以state里面包含了两个状态量,一个短期状态量h(t),一个长期状态量C(t),我们真正需要的是C(t)
state是每一层,最后一个时刻的输出:[1(层数),2(状态),batch_size,cell_state_size]
这个是1层LSTM,所以是[1,2,batch_size,cell_state_size]
outputs 最后一层,最后一个时刻,既output[:,-1:]
state是最后一层,最后一个时刻的C(t)的输出 state[-1][-1]
4、使用多个BasicLSTMCell
我的代码数据输入为[3,6,4],2层LSTM
output 是最后一层,每一个时刻的输出:[batch_size,max_time,cell_state_size]
由于这里是LSTM,所以state里面包含了两个状态量,一个短期状态量h(t),一个长期状态量C(t),我们真正需要的是C(t)
state是每一层,最后一个时刻的输出:[2(层数),2(状态),batch_size,cell_state_size]
这个是2层LSTM,所以是[2,2,batch_size,cell_state_size]
outputs 最后一层,最后一个时刻,既output[:,-1:]
state是最后一层,最后一个时刻的C(t)的输出 state[-1][-1]
LSTM的代码
import tensorflow as tf
import numpy as np
layer_number = 2
# 创建输入数据,3代表batch size,6代表输入序列的最大步长(max time),4代表每个序列的维度
X = np.random.randn(3, 6, 4)
# 第二个输入的实际长度为4
# X[1, 4:] = 0
# 记录三个输入的实际步长
X_lengths = [6, 6, 6]
rnn_hidden_size = 5
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.BasicLSTMCell(rnn_hidden_size) for _ in range(layer_number)])
# cell = tf.nn.rnn_cell.BasicRNNCell(rnn_hidden_size)
outputs, last_states = tf.nn.dynamic_rnn(
cell=cell,
dtype=tf.float64,
sequence_length=X_lengths,
inputs=X)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
output, state = session.run([outputs, last_states])
print(np.shape(output))
print(output)
print(np.shape(state))
print(state)