图解、手动模拟tensorflow的MultiRNNCell

最新推荐文章于 2022-01-24 16:58:55 发布

kami0116

最新推荐文章于 2022-01-24 16:58:55 发布

阅读量403

点赞数 2

文章标签： multirnn lstm 理解

本文链接：https://blog.csdn.net/kami0116/article/details/94893008

版权

多元单层lstm

输入：向量长度为2，minibatch为2

图示：

代码：

import tensorflow as tf
import numpy as np

hidden_size = 3
forget_bias = 0.0
multi_size = 2

# tf graph
x_data = np.array(np.arange(1, 5), np.float32).reshape(2, 1, 2)
cells = [tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=forget_bias) for i in range(multi_size)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells)

outputs, states = tf.nn.dynamic_rnn(multi_cell, x_data, dtype=tf.float32)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

# tf计算
outputs_value, state_value = sess.run([outputs, states])
print('==========tf计算==========')
print(f'c1:\n{state_value[0].c}')
print(f'h1:\n{state_value[0].h}')
print(f'c2:\n{state_value[1].c}')
print(f'h2:\n{state_value[1].h}')


# 手动计算准备
def sigmoid(d):
    return 1 / (1 + np.exp(-d))


def calculate(w, b, x, h_prev, c_prev):
    global forget_bias
    input_ = np.hstack((x, h_prev))
    r = input_.dot(w) + b
    r.reshape(-1, hidden_size * 4)

    i = sigmoid(r[:, 0:0 + hidden_size])
    j = np.tanh(r[:, hidden_size:hidden_size * 2])
    f = sigmoid(r[:, hidden_size * 2:hidden_size * 3] + forget_bias)
    o = sigmoid(r[:, hidden_size * 3:hidden_size * 4])
    c = f * c_prev + i * j
    h = np.tanh(c) * o
    return c, h


w0, b0, w1, b1 = [sess.run(weight) for weight in multi_cell.weights]
zeros = np.zeros((2, 3), np.float32)

# 手动计算
print('\n\n==========手动计算==========')
c1, h1 = calculate(w0, b0, x_data[:, 0, :], zeros, zeros)
c2, h2 = calculate(w1, b1, h1, zeros, zeros)
print(f'c1:\n{c1}')
print(f'h1:\n{h1}')
print(f'c2:\n{c2}')
print(f'h2:\n{h2}')

sess.close()

多元多层lstm

输入：向量长度为2，minibatch为2，2层

图示：

代码：

import tensorflow as tf
import numpy as np

hidden_size = 3
forget_bias = 0.0
multi_size = 2

# tf graph
x_data = np.array(np.arange(1, 9), np.float32).reshape(2, 2, 2)
cells = [tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=forget_bias) for i in range(multi_size)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells)

outputs, states = tf.nn.dynamic_rnn(multi_cell, x_data, dtype=tf.float32)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

# tf计算
outputs_value, state_value = sess.run([outputs, states])
print('==========tf计算==========')
print(f'h2:\n{outputs_value[:, 0, :]}')
print(f'c3:\n{state_value[0].c}')
print(f'h3:\n{state_value[0].h}')
print(f'c4:\n{state_value[1].c}')
print(f'h4:\n{state_value[1].h}')


# 手动计算准备
def sigmoid(d):
    return 1 / (1 + np.exp(-d))


def calculate(w, b, x, h_prev, c_prev):
    global forget_bias
    input_ = np.hstack((x, h_prev))
    r = input_.dot(w) + b
    r.reshape(-1, hidden_size * 4)

    i = sigmoid(r[:, 0:0 + hidden_size])
    j = np.tanh(r[:, hidden_size:hidden_size * 2])
    f = sigmoid(r[:, hidden_size * 2:hidden_size * 3] + forget_bias)
    o = sigmoid(r[:, hidden_size * 3:hidden_size * 4])
    c = f * c_prev + i * j
    h = np.tanh(c) * o
    return c, h


w0, b0, w1, b1 = [sess.run(weight) for weight in multi_cell.weights]
zeros = np.zeros((2, 3), np.float32)

# 手动计算
print('\n\n==========手动计算==========')
print('\nt=1')
c1, h1 = calculate(w0, b0, x_data[:, 0, :], zeros, zeros)
c2, h2 = calculate(w1, b1, h1, zeros, zeros)
print(f'c1:\n{c1}')
print(f'h1:\n{h1}')
print(f'c2:\n{c2}')
print(f'h2:\n{h2}')

print('\nt=2')
c3, h3 = calculate(w0, b0, x_data[:, 1, :], h1, c1)
c4, h4 = calculate(w1, b1, h3, h2, c2)
print(f'c3:\n{c3}')
print(f'h3:\n{h3}')
print(f'c4:\n{c4}')
print(f'h4:\n{h4}')

sess.close()