tensorflow2.0 循环神经网络–情感分类实战代码
本文主要是情感分类单层实战RNN Cell代码
import os
import numpy as np
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(220)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
assert tf.__version__.startswith('2.')
#x为用户的评语,y为标记 数据集为imdb
total_words=10000
max_review_len=80
embedding_len=100
batchsz=128
(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=total_words)
#x_train:[b,80],number b个句子,每个句子有80个单词
#x_test:[b,80]
x_train=keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)#数据预处理 maxlen:None或整数,为序列句子的最大长度。大于此长度的序列将被截短,小于此长度的序列将在后部填0.
x_test=keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
db_train=tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_train=db_train.shuffle(1000).batch(batchsz,drop_remainder=True)#drop_remainder:最后一个batch小于batchsz就会丢弃掉
db_test=tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test=db_test.shuffle(1000).batch(batchsz,drop_remainder=True)
print('x_train shape:',x_train.shape,tf.reduce_max(y_train),tf.reduce_min(y_train))
print('x_test shape:',x_test.shape)
#构建网络结构
class MyRNN(keras.Model):
def __init__(self,units):
super(MyRNN,self).__init__()#父类的初始化函数
self.state0=[tf.zeros([batchsz,units])]
self.state1=[tf.zeros([batchsz,units])]
#将[b,80]转换为[b,80,100]
self.embedding=layers.Embedding(total_words,embedding_len,
input_length=max_review_len)#一个单词有100维,80个单词
#[b,80,100],h_dim:64
#RNN:cell1,cell2,cell3
#SimpleRNN类
self.rnn_cell0=layers.SimpleRNNCell(units,dropout=0.2)# 将单词的100维的向量进行rnn信息提取 dropout 防止过拟合
self.rnn_cell1=layers.SimpleRNNCell(units,dropout=0.2)
#分类 fc,[b,80,100] -->[b,64]-->[b,1]
self.outlayer=layers.Dense(1)#1个输入节点 [b,64]-->[b,1]
#前向运算的过程
def call(self,inputs,training=None):
'''
net(x) net(x,training=True):train mode
net(x,training=False)
#前项运算
:param inputs: [b,80]
:param training:
:return:
'''
#[b,80]
x=inputs
#embedding:[b,80] -->[b,80,100]
x=self.embedding(x)
#rnn cell computer
#[b,80,100]-->[b,64]
state0=self.state0
state1= self.state1
for word in tf.unstack(x,axis=1): #word:[b,100]#在一维上进行展开
#h1=x*wxh+h0*whh
out,state0=self.rnn_cell0(word,state0,training)
#out:[b,64]-->[b,1]
x=self.outlayer(out)
#p(y is pos|x)
prob=tf.sigmoid(x)
return prob
def main():
units=64
epochs=4
model=MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'],
experimental_run_tf_function=False
)
model.fit(db_train,epochs=epochs,validation_data=db_test)
#测试
model.evaluate(db_test)
if __name__ == '__main__':
main()
文最后附上霍金的一句话:
人工智能的强力崛起,可能是人类历史上最好的事情,也可能是最糟糕的事情。
------史蒂芬·霍金