前言
关于循环神经网络的理论推导和证明,推荐去看论文。参考资料。
- https://colah.github.io/posts/2015-08-Understanding-LSTMs/
https://r2rt.com/styles-of-truncated-backpropagation.html
本章主要写循环神经网络RNN.基本的RNN,双向LSTM,动态LSTM。
训练集用MNIST,用作分类问题。主要练习RNN搭建。
Simple RNN
- 输入为[batch_size,n_step,n_input].
- reshape为[n_step,[batch,n_input]]
- 网络架构h_t输出的维度为512
- 网络经过CELL输出为(?,512)
- 然后进行wx+b
- softmax 的交叉熵 ,就是output 在softmax之后求loss
# -*- coding: UTF-8 -*
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
#加载数据
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
n_input = 28
n_steps = 28
n_hidden = 512 #h_t输出的维度
n_clssses = 10
#tf graph input
x = tf.placeholder(tf.float32,[None,n_steps,n_input])
y = tf.placeholder(tf.float32,[None,n_clssses])
#创建w和b
W = {
'out':tf.Variable(tf.random_normal([n_hidden,n_clssses]))
}
b = {
'out':tf.Variable(tf.random_normal([n_clssses]))
}
def RNN(x,W,b):
x = tf.unstack(x,n_steps,1)
#forget_bias 为设置遗忘门的参数
lstm_cell = rnn.BasicLSTMCell(n_hidden,forget_bias=1.0)
# #static_rnn(
# cell,
# inputs,
# initial_state=None,
# dtype=None,
# sequence_length=None,
# scope=None
# )
#cell: An instance of RNNCell.
#inputs: A length T list of inputs, each a Tensor of shape [batch_size, input_size], or a nested tuple of such elements.
outputs,states = rnn.static_rnn(lstm_cell,x,dtype=tf.float32)
return tf.matmul(outputs[-1],W['out']) + b['out']
pred = RNN(x,W,b)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=pred))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#评估模型
correct_pred = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
step = 1
while step * batch_size < training_iters:
batch_x,batch_y = mnist.train.next_batch(batch_size)
batch_x = batch_x.reshape((batch_size,n_steps,n_input))
sess.run(optimizer,feed_dict={x:batch_x,y:batch_y})
if step % display_step == 0:
acc = sess.run(accuracy,feed_dict={x:batch_x,y:batch_y})
loss = sess.run(cost,feed_dict={x:batch_x,y:batch_y})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step+=1
print "Optimization Finished!"
# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
test_label = mnist.test.labels[:test_len]
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: test_data, y: test_label})
双向LSTM bidirectional rnn
基本架构与simple是一样的。不同的在于。
- cell 分为fw,bw前向和后向的cell
- 两个cell的参数相加为h_t输出维度
- W的维度为前后向cell相加的维度
# -*- coding: UTF-8 -*
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
#parameters
learning_rate = 0.001
training_iter = 100000
batch_size = 128
display_step = 10
#network parameters
n_input = 28
n_steps = 28
n_hidden = 512
n_classes = 10
#定义占位符
x = tf.placeholder(tf.float32,[None,n_steps,n_input])
y = tf.placeholder(tf.float32,[None,n_classes])
#定义权重和偏置
weights = {
# Hidden layer weights => 2*n_hidden because of foward + backward cells
'out':tf.Variable(tf.random_normal([2*n_hidden,n_classes]))
}
biases = {
'out':tf.Variable(tf.random_normal([n_classes]))
}
def BiRNN(x,weights,biases):
x = tf.unstack(x,n_steps,1)
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias=1.0)
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias=1.0)
output,_,_ = rnn.static_bidirectional_rnn(lstm_fw_cell,lstm_bw_cell,x,dtype=tf.float32)
return tf.matmul(output[-1],weights['out'])+biases['out']
pred = BiRNN(x,weights,biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=pred))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#评估模型
correct_pred = tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
step = 1
while step*batch_size < training_iter:
batch_x,batch_y = mnist.train.next_batch(batch_size)
#reshape
batch_x = batch_x.reshape((batch_size,n_steps,n_input))
sess.run(optimizer,feed_dict={
x:batch_x,
y:batch_y
})
if step % display_step == 0:
acc = accuracy.eval(feed_dict={x:batch_x,y:batch_y})
loss = cost.eval(feed_dict={x:batch_x,y:batch_y})
print "Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step+=1
print "Optimization Finished!"
test_len = 128
test_X = mnist.test.images[:test_len].reshape((-1,n_steps,n_input))
test_y = mnist.test.labels[:test_len]
print "Testing Accuracy:",\
sess.run(accuracy,feed_dict={x:test_X,y:test_y})
dynamic rnn
from __future__ import print_function
import tensorflow as tf
import random
# ====================
# TOY DATA GENERATOR
# ====================
class ToySequenceData(object):
""" Generate sequence of data with dynamic length.
This class generate samples for training:
- Class 0: linear sequences (i.e. [0, 1, 2, 3,...])
- Class 1: random sequences (i.e. [1, 3, 10, 7,...])
NOTICE:
We have to pad each sequence to reach 'max_seq_len' for TensorFlow
consistency (we cannot feed a numpy array with inconsistent
dimensions). The dynamic calculation will then be perform thanks to
'seqlen' attribute that records every actual sequence length.
"""
def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3,
max_value=1000):
self.data = []
self.labels = []
self.seqlen = []
for i in range(n_samples):
# Random sequence length
len = random.randint(min_seq_len, max_seq_len)
# Monitor sequence length for TensorFlow dynamic calculation
self.seqlen.append(len)
# Add a random or linear int sequence (50% prob)
if random.random() < .5:
# Generate a linear sequence
rand_start = random.randint(0, max_value - len)
s = [[float(i) / max_value] for i in
range(rand_start, rand_start + len)]
# Pad sequence for dimension consistency
s += [[0.] for i in range(max_seq_len - len)]
self.data.append(s)
self.labels.append([1., 0.])
else:
# Generate a random sequence
s = [[float(random.randint(0, max_value)) / max_value]
for i in range(len)]
# Pad sequence for dimension consistency
s += [[0.] for i in range(max_seq_len - len)]
self.data.append(s)
self.labels.append([0., 1.])
self.batch_id = 0
def next(self, batch_size):
""" Return a batch of data. When dataset end is reached, start over.
"""
if self.batch_id == len(self.data):
self.batch_id = 0
batch_data = (self.data[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
batch_labels = (self.labels[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
batch_seqlen = (self.seqlen[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
self.batch_id = min(self.batch_id + batch_size, len(self.data))
return batch_data, batch_labels, batch_seqlen
# ==========
# MODEL
# ==========
# Parameters
learning_rate = 0.01
training_iters = 1000000
batch_size = 128
display_step = 10
# Network Parameters
seq_max_len = 20 # Sequence max length
n_hidden = 64 # hidden layer num of features
n_classes = 2 # linear sequence or not
trainset = ToySequenceData(n_samples=1000, max_seq_len=seq_max_len)
testset = ToySequenceData(n_samples=500, max_seq_len=seq_max_len)
# tf Graph input
x = tf.placeholder("float", [None, seq_max_len, 1])
y = tf.placeholder("float", [None, n_classes])
# A placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def dynamicRNN(x, seqlen, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.unstack(x, seq_max_len, 1)
# Define a lstm cell with tensorflow
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
# Get lstm cell output, providing 'sequence_length' will perform dynamic
# calculation.
outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32,
sequence_length=seqlen)
# When performing dynamic calculation, we must retrieve the last
# dynamically computed output, i.e., if a sequence length is 10, we need
# to retrieve the 10th output.
# However TensorFlow doesn't support advanced indexing yet, so we build
# a custom op that for each sample in batch size, get its length and
# get the corresponding relevant output.
# 'outputs' is a list of output at every timestep, we pack them in a Tensor
# and change back dimension to [batch_size, n_step, n_input]
# 'x' is [1, 4]
# 'y' is [2, 5]
# 'z' is [3, 6]
#stack([x, y, z]) = > [[1, 4], [2, 5], [3, 6]] # Pack along first dim.
#stack([x, y, z], axis=1) = > [[1, 2, 3], [4, 5, 6]]
outputs = tf.stack(outputs) #[n_step,batch_size,n_output]
outputs = tf.transpose(outputs, [1, 0, 2]) #[batch_size,n_steps,n_output]
# Hack to build the indexing and retrieve the right output.
batch_size = tf.shape(outputs)[0]
# Start indices for each sample
index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)
# Indexing
outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)
print (outputs.shape)
exit()
# Linear activation, using outputs computed above
return tf.matmul(outputs, weights['out']) + biases['out']
pred = dynamicRNN(x, seqlen, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy
test_data = testset.data
test_label = testset.labels
test_seqlen = testset.seqlen
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: test_data, y: test_label,
seqlen: test_seqlen}))
本文详细介绍如何使用TensorFlow实现三种类型的循环神经网络(RNN),包括简单的RNN、双向LSTM和动态RNN,并通过MNIST数据集进行训练验证。
573

被折叠的 条评论
为什么被折叠?



