1.概述
循环神经网络:
R
N
N
RNN
RNN
它是一种自监督的学习方式
它处理序列的方式为:便利所有序列元素,并保存一个状态,其中包含已查看内容相关的信息.即当前时刻的输出不仅与当前输入有关,而且与前一时刻的状态(输出)有关,
H
t
=
a
c
t
i
v
e
F
u
n
c
(
W
h
x
∗
X
t
+
W
h
h
∗
H
t
−
1
+
b
)
H_{t}=activeFunc(W_{hx}*X_{t}+W_{hh}*H_{t-1}+b)
Ht=activeFunc(Whx∗Xt+Whh∗Ht−1+b)
2.用numpy实现RNN
import numpy as np
timeSteps=100
inFeatures=32
outFeatures=64
inX=np.random.randn(timeSteps,inFeatures)
Whx=np.random.randn(outFeatures,inFeatures)
Whh=np.random.randn(outFeatures,outFeatures)
b=np.random.randn(outFeatures,)
state_t=np.zeros((outFeatures,)) #状态变量初始化
activeFunc=np.tanh #激活函数
outResult=[]
for X in inX:
outX=activeFunc(np.dot(Whx,X)+np.dot(Whh,state_t)+b)
outResult.append(outX)
state_t=outX
result=np.stack(outResult,axis=0)
print(result.shape) # (timeSteps,outFeatures)
3.Keras实现RNN
keras.layers.SimpleRNN
:
输入数据:[batchSize,timeSteps,inputFeatures]
输出:[batchSize,timeSteps,outputFeatures]
中间每个输出(状态变量):[batchSize,outputFeatures]
这两种输出的控制模式由参数return_sequences
来控制
return_sequences
=True
,返回[batchSize,timeSteps,outputFeatures]
return_sequences
=False
,返回[batchSize,outputFeatures]
如果对多个RNN
层进行堆叠,那么每个中间层都要返回完整的输出序列
from tensorflow.keras.layers import Embedding,SimpleRNN
from tensorflow.keras.models import Sequential
model=Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32,return_sequences=True))
model.summary()
from tensorflow.keras.layers import Embedding,SimpleRNN
from tensorflow.keras.models import Sequential
model=Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32))
model.summary()
4.示例
"""将RNN用于IMDB电影评论分类"""
#1.数据预处理
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import text,sequence
max_words=10000 #词典中单词数
max_len=500 #每条句子中的最大单词数
(train_data,train_labels),(test_data,test_labels)=imdb.load_data(num_words=10000)
train_data=sequence.pad_sequences(train_data,maxlen=max_len)
test_data=sequence.pad_sequences(test_data,maxlen=max_len)
print("train Data Shape:",train_data.shape,"train label Shape:",train_labels.shape)
print("test Data Shape:",test_data.shape,"test label Shape:",test_labels.shape)
#2.构建网络
model=Sequential()
model.add(Embedding(max_words,32))
model.add(SimpleRNN(32))
model.add(Dense(1,activation="sigmoid"))
#3.配置学习过程
from tensorflow.keras import optimizers,losses
model.compile(optimizer=optimizers.RMSprop(1e-3),loss=losses.binary_crossentropy,metrics=["acc"])
#4.训练
history=model.fit(train_data,train_labels,batch_size=128,epochs=10,validation_split=0.2) #0.2比例划分为验证集
#5.绘制
import matplotlib.pyplot as plt
import numpy as np
"""损失函数绘制"""
history_dict=history.history
train_loss_values=history_dict["loss"]
val_loss_values=history_dict["val_loss"]
epochs=np.arange(1,len(train_loss_values)+1)
plt.figure(1)
plt.plot(epochs,train_loss_values,label="training loss")
plt.plot(epochs,val_loss_values,label="val loss")
plt.title("Training and Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
"""精度(准确率)绘制"""
train_acc=history_dict["acc"]
val_acc=history_dict["val_acc"]
plt.figure(2)
plt.plot(epochs,train_acc,label="Training Acc")
plt.plot(epochs,val_acc,label="Val Acc")
plt.title("Training and Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Acc")
plt.legend()
plt.show()
从上面的例子中,我们可以发现应用RNN
的效果并不是很好;RNN
不擅长于处理长序列问题,因为随着序列长度的增加,其可能会出现梯度消失的问题;为了解决梯度小时问题,人们设计出了LSTM
与GRU
5 LSTM
LSTM
:Long Short Time Memory
LSTM
单元的作用:允许过去的信息稍后重新进入,从而解决梯度消失问题
相关理论可以参考:https://www.jianshu.com/p/9dc9f41f0b29
使用LSTM进行IMDB数据集分类
#1.数据预处理
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import text,sequence
max_words=10000 #词典中单词数
max_len=500 #每条句子中的最大单词数
(train_data,train_labels),(test_data,test_labels)=imdb.load_data(num_words=10000)
train_data=sequence.pad_sequences(train_data,maxlen=max_len)
test_data=sequence.pad_sequences(test_data,maxlen=max_len)
print("train Data Shape:",train_data.shape,"train label Shape:",train_labels.shape)
print("test Data Shape:",test_data.shape,"test label Shape:",test_labels.shape)
#2.构建网络
model=Sequential()
model.add(Embedding(max_words,32))
model.add(LSTM(32))
model.add(Dense(1,activation="sigmoid"))
#3.配置学习过程
from tensorflow.keras import optimizers,losses
model.compile(optimizer=optimizers.RMSprop(1e-3),loss=losses.binary_crossentropy,metrics=["acc"])
#4.训练
history=model.fit(train_data,train_labels,batch_size=128,epochs=10,validation_split=0.2) #0.2比例划分为验证集
#5.绘制
import matplotlib.pyplot as plt
import numpy as np
"""损失函数绘制"""
history_dict=history.history
train_loss_values=history_dict["loss"]
val_loss_values=history_dict["val_loss"]
epochs=np.arange(1,len(train_loss_values)+1)
plt.figure(1)
plt.plot(epochs,train_loss_values,label="training loss")
plt.plot(epochs,val_loss_values,label="val loss")
plt.title("Training and Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
"""精度(准确率)绘制"""
train_acc=history_dict["acc"]
val_acc=history_dict["val_acc"]
plt.figure(2)
plt.plot(epochs,train_acc,label="Training Acc")
plt.plot(epochs,val_acc,label="Val Acc")
plt.title("Training and Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Acc")
plt.legend()
plt.show()
观察以上结果,发现使用LSTM
在验证集上的准确率可以达到85%以上,而使用简单的RNN
验证集上的准确率只有80%左右。