一般的RNN存在长期依赖问题,为了解决这个问题,LSTM被设计出来
关于LSTM可以参考http://www.jianshu.com/p/9dc9f41f0b29
1、这里给出tensorflow使用LSTM的框架
#coding:utf-8
import tensorflow as tf
from tensorflow.contrib.rnn import core_rnn_cell
lstm = core_rnn_cell.BasicLSTMCell(lstm_hidden_size)#隐藏层
state = lstm.zero_state(batch_size,tf.float32)#将状态初始化为全0数组
loss = 0.0
#为了避免梯度消散,规定一个最大的序列长度,num_step
for i in range(num_steps):
if i>0:
tf.get_variable_scope().reuse_variables()#在第一个时刻声明lstm结构中使用的变量,在之后的时刻都需要重复使用之前定义好的变量
lstm_output,state = lstm(current_input,state)
final_output = fully_connected(lstm_output)
loss += calc_loss(final_output,expeted_output)
2、深度LSTM
#coding:utf-8
import tensorflow as tf
lstm = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(lstm_size)
#lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
#深层循环神经网络
#number_of_layers表示有多少层
stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm]*number_of_layers)
state = stacked_lstm.zero_state(batch_size,tf.float32)
for i in range(len(num_steps)):
if i >0:
tf.get_variable_scope().reuse_variables()
stacked_lstm_output,state = stacked_lstm(current_input,state)
final_output = fully_connected(stacked_lstm_output)
loss += calc_loss(final_output,expected_output)
3、加上Dropout
#coding:utf-8
#创建一个带有dropout的深层lstm
import tensorflow as tf
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
dropput_lstm = tf.contrib.rnn.DropoutWrapper(lstm,output_keep_prob =0.5)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([dropout_lstm]*number_of_layers)
for i in range(num_step):
if i>0:
tf.get_variable_scope().reuse_variables()
output,state = stacked_lstm(current_input,state)
final_output = final_connected(output)
loss += calc_loss(final_output,expected_output)
4、简单应用:预测sin函数
#coding:utf-8
import tensorflow as tf
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
from matplotlib import pyplot as plt
learn = tf.contrib.learn
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 32
TRAIN_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
def generate_data(seq):
X = []
y = []
for i in range(len(seq)-TIMESTEPS-1):
X.append(seq[i:i+TIMESTEPS])
y.append(seq[i+TIMESTEPS])
return np.array(X,dtype=np.float32),np.array(y,dtype=np.float32)
def lstm_model(X,y):
lstm_cell = tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*NUM_LAYERS)
x_=tf.unstack(X,axis = 1)#a=tf.unpack([[1,2,3],[1,25,6]],axis=1),
#[array([1, 1], dtype=int32), array([ 2, 25], dtype=int32), array([3, 6], dtype=int32)]
###########################################################
output,_ = tf.contrib.rnn(cell,x_,dtype=tf.float32)
output = output[-1]#取最后一个值
###########################################################
prediction,loss = learn.LinearRegressor(output,y)#
#优化器的新定义方法
train_op = tf.contrib.layers.optimize_loss(loss,tf.contrib.frameworks.get_global_step(),optimizer = "Adagrad",learning_rate=0.1)
return prediction ,loss,train_op
regressor = learn.Estimator(model_fn = lstm_model)
test_start = TRAIN_EXAMPLES*SAMPLE_GAP
test_end = (TRAIN_EXAMPLES+TESTING_EXAMPLES)*SAMPLE_GAP
train_X,train_y = generate_data(np.sin(np.linspace(0,test_start,TRAIN_EXAMPLES,dtype=np.float32)))
test_X,test_y = generate_data(np.sin(np.linspace(test_start,test_end,TESTING_EXAMPLES,dtype=np.float32)))
#fit一句话用来训练
regressor.fit(train_X,train_y,batch_size=BATCH_SIZE,steps = TRAINING_STEPS)
prcdicted = [[pred] for pred in regressor.predict(test_X)]
rmse = np.sqrt(((predicted-test_y)**2).mean(axis=0))
print "误差为 ","rmse"
fig = plt.figure()
plot_predicted = plt.plot(predicted,label = "predicted")
plot_test = plt(test_y,label = "real_sin")
plt.legend([plot_predicted,plot_test],["predicted","real_sin"])
fig.savefig("sin.png")
5、实际上在NLP中,循环神经网络应用是比较多的。循环神经网络可以捕获上下文信息,可以进行语言建模