[自然语言处理]循环神经网络结构的TensorFlow实现整理
[自然语言处理]循环神经网络结构的TensorFlow实现整理
循环神经网络是一类处理序列数据的隐变量自回归模型:
- 序列数据:音乐、语言、文本等连续的,含义与元素顺序极大相关的数据
- 隐变量自回归模型:形式上,会保留对历史序列观测的“总结”,并且同时更新预测当前序列与“总结”的模型
常见的循环神经网络架构的模型有:RNN,GRU,LSTM
网络 | 结构 | 描述 |
---|---|---|
RNN | 当前时刻的输出由当前输入与隐状态决定 | |
GRU | 通过两个门控机制,让模型学习合适的时间更新隐状态、重置隐状态 | |
LSTM | 增加了一个隐状态单元(记忆)来控制输入与遗忘,并通过三个门控机制,决定单元内部的更新 |
TensorFlow低阶API实现
import tensorflow as tf
# 定义参数
def get_params(vocab_size, num_hiddens):
num_inputs = num_outputs = vocab_size
def normal(shape):
return tf.random.normal(shape=shape, stddev=0.01, mean=0, dtype=tf.float32)
# GRU/LSTM
def three():
return (
tf.Variable(normal((num_inputs, num_hiddens)), dtype=tf.float32)
tf.Variable(normal((num_hiddens, num_hiddens)), dtype=tf.float32)
tf.Variable(tf.zeros(num_hiddens), dtype=tf.float32)
)
# 隐藏层参数
# RNN ############
W_xh, W_hh, b_h = three()
params = [W_xh, W_hh, b_h]
# # GRU ############
# W_xz, W_hz, b_z = three() # 更新门
# W_xr, W_hr, b_r = three() # 重置门
# W_xh, W_hh, b_h = three() # 隐状态
# params = [W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h]
# # LSTM ############
# W_xi, W_hi, b_i = three() # 输入门
# W_xf, W_hf, b_f = three() # 遗忘门
# W_xo, W_ho, b_o = three() # 输出门
# W_xc, W_hc, b_c = three() # 候选记忆元参数
# params = [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c]
# 输出层参数
W_hq = tf.Variable(normal((num_hiddens, num_outputs)), dtype=tf.float32)
b_q = tf.Variable(tf.zeros(num_outputs), dtype=tf.float32)
parms += [W_hq, b_q]
return params
# 定义隐状态
def init_state(batch_size, num_hiddens):
# RNN/GRU 只有一个隐状态,LSTM 有两个
return (tf.zeros((batch_size, num_hiddens)), tf.zeros((batch_size, num_hiddens)))
# 定义网络结构
def cell(inputs, state, params):
outputs = []
# RNN ############
H, _ = state
W_xh, W_hh, b_h, W_hq, b_q = params
for X in inputs:
# X (-1, num_inputs)
X = tf.reshape(X, [-1, W_xh.shape[0]])
H = tf.tanh(tf.matmul(X, W_xh) + tf.matmul(H, W_hh) + b_h)
Y = tf.matmul(H, W_hq) + b_q
outputs.append(Y)
return tf.concat(outputs, axis=0), (H, _)
# # GRU ############
# H, _ = state
# W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hq, b_q = params
# for X in inputs:
# X = tf.reshape(X, [-1, W_xz.shape[0]])
# Z = tf.sigmoid(tf.matmul(X, W_xz) + tf.matmul(H, W_hz) + b_z)
# R = tf.sigmoid(tf.matmul(X, W_xr) + tf.matmul(H, W_hr) + b_r)
# H_tilda = tf.tanh(tf.matmul(X, W_xh) + tf.matmul(R * H, W_hh) + b_h)
# H = Z * H + (1 - Z) * H_tilda
# Y = tf.matmul(H, W_hq) + b_q
# outputs.append(Y)
# return tf.concat(outputs, axis=0), (H, _)
# # LSTM ###########
# (H, C) = state
# W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q = params
# for X in inputs:
# X = tf.reshape(X, [-1, W_xi.shape[0]])
# I = tf.sigmoid(tf.matmul(X, W_xi) + tf.matmul(H, W_hi) + b_i)
# F = tf.sigmoid(tf.matmul(X, W_xf) + tf.matmul(H, W_hf) + b_f)
# O = tf.sigmoid(tf.matmul(X, W_xo) + tf.matmul(H, W_ho) + b_o)
# C_tilda = tf.tanh(tf.matmul(X, W_xc) + tf.matmul(H, W_hc) + b_c)
# C = F * C + I * C_tilda
# H = O * tf.tanh(C)
# Y = tf.matmul(H, W_hq) + b_q
# outputs.append(Y)
# return tf.concat(outputs, axis=0), (H, C)
TensorFlow高阶API实现
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import SimpleRNN, GRU, LSTM
from tensorflow.keras.optimizers import Adam
embedding_size = 8
model = Sequential()
# Embedding层,将word由one-hot转变为稠密的嵌入向量
model.add(
Embedding(
input_dim=num_words,
output_dim=embedding_size,
input_length=max_tokens,
name='layer_embedding'
)
)
# 这里用三个GRU单元堆叠在一起,实现了一个深度循环神经网络
# return_sequences=True 指返回整个输出序列
model.add(GRU(units=16, return_sequences=True))
model.add(GRU(units=8, return_sequences=True))
model.add(GRU(units=4))
model.add(Dense(1, activation='sigmoid'))
optimizer = Adam(learning_rate=1e-3)
model.compile(
loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy']
)
参考链接
本文出现的代码与图片均来自以下链接: