多元单层lstm
输入:向量长度为2,minibatch为2
图示:
代码:
import tensorflow as tf
import numpy as np
hidden_size = 3
forget_bias = 0.0
multi_size = 2
# tf graph
x_data = np.array(np.arange(1, 5), np.float32).reshape(2, 1, 2)
cells = [tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=forget_bias) for i in range(multi_size)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_cell, x_data, dtype=tf.float32)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
# tf计算
outputs_value, state_value = sess.run([outputs, states])
print('==========tf计算==========')
print(f'c1:\n{state_value[0].c}')
print(f'h1:\n{state_value[0].h}')
print(f'c2:\n{state_value[1].c}')
print(f'h2:\n{state_value[1].h}')
# 手动计算准备
def sigmoid(d):
return 1 / (1 + np.exp(-d))
def calculate(w, b, x, h_prev, c_prev):
global forget_bias
input_ = np.hstack((x, h_prev))
r = input_.dot(w) + b
r.reshape(-1, hidden_size * 4)
i = sigmoid(r[:, 0:0 + hidden_size])
j = np.tanh(r[:, hidden_size:hidden_size * 2])
f = sigmoid(r[:, hidden_size * 2:hidden_size * 3] + forget_bias)
o = sigmoid(r[:, hidden_size * 3:hidden_size * 4])
c = f * c_prev + i * j
h = np.tanh(c) * o
return c, h
w0, b0, w1, b1 = [sess.run(weight) for weight in multi_cell.weights]
zeros = np.zeros((2, 3), np.float32)
# 手动计算
print('\n\n==========手动计算==========')
c1, h1 = calculate(w0, b0, x_data[:, 0, :], zeros, zeros)
c2, h2 = calculate(w1, b1, h1, zeros, zeros)
print(f'c1:\n{c1}')
print(f'h1:\n{h1}')
print(f'c2:\n{c2}')
print(f'h2:\n{h2}')
sess.close()
多元多层lstm
输入:向量长度为2,minibatch为2,2层
图示:
代码:
import tensorflow as tf
import numpy as np
hidden_size = 3
forget_bias = 0.0
multi_size = 2
# tf graph
x_data = np.array(np.arange(1, 9), np.float32).reshape(2, 2, 2)
cells = [tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=forget_bias) for i in range(multi_size)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_cell, x_data, dtype=tf.float32)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
# tf计算
outputs_value, state_value = sess.run([outputs, states])
print('==========tf计算==========')
print(f'h2:\n{outputs_value[:, 0, :]}')
print(f'c3:\n{state_value[0].c}')
print(f'h3:\n{state_value[0].h}')
print(f'c4:\n{state_value[1].c}')
print(f'h4:\n{state_value[1].h}')
# 手动计算准备
def sigmoid(d):
return 1 / (1 + np.exp(-d))
def calculate(w, b, x, h_prev, c_prev):
global forget_bias
input_ = np.hstack((x, h_prev))
r = input_.dot(w) + b
r.reshape(-1, hidden_size * 4)
i = sigmoid(r[:, 0:0 + hidden_size])
j = np.tanh(r[:, hidden_size:hidden_size * 2])
f = sigmoid(r[:, hidden_size * 2:hidden_size * 3] + forget_bias)
o = sigmoid(r[:, hidden_size * 3:hidden_size * 4])
c = f * c_prev + i * j
h = np.tanh(c) * o
return c, h
w0, b0, w1, b1 = [sess.run(weight) for weight in multi_cell.weights]
zeros = np.zeros((2, 3), np.float32)
# 手动计算
print('\n\n==========手动计算==========')
print('\nt=1')
c1, h1 = calculate(w0, b0, x_data[:, 0, :], zeros, zeros)
c2, h2 = calculate(w1, b1, h1, zeros, zeros)
print(f'c1:\n{c1}')
print(f'h1:\n{h1}')
print(f'c2:\n{c2}')
print(f'h2:\n{h2}')
print('\nt=2')
c3, h3 = calculate(w0, b0, x_data[:, 1, :], h1, c1)
c4, h4 = calculate(w1, b1, h3, h2, c2)
print(f'c3:\n{c3}')
print(f'h3:\n{h3}')
print(f'c4:\n{c4}')
print(f'h4:\n{h4}')
sess.close()