处理文本、获取到X和Y
1 import tensorflow as tf 2 import numpy as np 3 from tensorflow.contrib import rnn 4 from tensorflow.contrib import layers, seq2seq 5 6 tf.set_random_seed(777) 7 8 sentence = ("if you want to build a ship, don't drum up people together to " 9 "collect wood and don't assign them tasks and work, but rather " 10 "teach them to long for the endless immentsity of the sea.") 11 12 char_set = list(set(sentence)) # 得到不重复的字母 13 char_dict = {w : i for i, w in enumerate(char_set)} # 获得词典,以便以字母得到对应的数值 14 15 hidden_size = len(char_set) # 隐藏层个数 16 num_classes = len(char_set) # 分类 17 sequence_length = 20 # 每次训练的字符串长度为10,数量任意 18 learning_rate = 0.01 # 学习率 19 20 21 ''' 22 每次循环20个字母 23 ''' 24 dataX = [] 25 dataY = [] 26 for i in range(0, len(sentence) - sequence_length): 27 # 获取特征和标签集数据 28 x_str = sentence[i : i + sequence_length] 29 y_str = sentence[i + 1 : i + sequence_length + 1] 30 31 # 根据字典获取数据对应数值 32 x = [char_dict[i] for i in x_str] 33 y = [char_dict[i] for i in y_str] 34 35 # 将数值向量放入集合 36 dataX.append(x) 37 dataY.append(y)
占位符,session会话中传值调用
1 # 占位符 2 X = tf.placeholder(tf.int32, [None, sequence_length]) 3 Y = tf.placeholder(tf.int32, [None, sequence_length])
one-hot编码
1 # 数值集合转化为对应的one-hot编码 2 X_one_hot = tf.one_hot(dataX, num_classes)
RNN使用
1 # LSTM单元格,使用hidden_size(每个单位输出向量大小)创建一个lstm单元格 2 def lstm_cell(): 3 cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True) 4 return cell 5 6 # RNN单元格 7 multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(2)], state_is_tuple=True) 8 9 # RNN输出 10 outputs, _ = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)
添加一层隐藏层,增加深度,为了更准确
1 X_for_fc = tf.reshape(outputs, [-1, hidden_size]) 2 outputs = layers.fully_connected(X_for_fc, num_classes, activation_fn=None) 3 4 # 确定输出的维度 5 batch_size = len(dataX) 6 outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])
使用了一层全连接之后效果提升很大
下面就是很正常的优化训练
1 weights = tf.ones([batch_size, sequence_length]) 2 3 sequence_loss = seq2seq.sequence_loss(logits=outputs, targets=Y, weights=weights) 4 mean_loss = tf.reduce_mean(sequence_loss) 5 6 train_op = tf.train.AdamOptimizer(learning_rate).minimize(mean_loss)
1 # 打开会话 2 sess = tf.Session() 3 sess.run(tf.global_variables_initializer()) 4 5 # 训练过程 6 for i in range(500): 7 _, loss, results = sess.run( 8 [train_op, mean_loss, outputs], feed_dict={X : dataX, Y : dataY}) # 梯度优化,损失,输出 9 for j, result in enumerate(results): 10 index = np.argmax(result, axis=1) # 获取索引 11 print(i, j, ''.join([char_set[t] for t in index]), loss) # 训练过程的输出
最后输出预测值,输出值中间都为重复值,所以进行判断组合成语句(可以选择从头取,也可以从尾取)。
1 results = sess.run(outputs, feed_dict={X : dataX}) # 预测输出值 2 for j, result in enumerate(results): 3 index = np.argmax(result, axis=1) # 得到索引 4 if j is 0: 5 print(''.join([char_set[t] for t in index]), end='') 6 else: 7 print(char_set[index[-1]], end='')
预测效果挺好,基本与样本语句符合