# -*- coding: utf-8 -*-
#!/usr/bin/python
'''
1-1. NNLM(Neural Network Language Model) - Predict Next Word
Paper - A Neural Probabilistic Language Model(2003)
'''
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
sentences = [ "i like dog", "i love coffee", "i hate milk"] #训练集
word_list = " ".join(sentences).split() #先用" ".join(),以空格为分隔,将sentences中的句子连接起来,再用split()以空格为分割点,将每个词分出来
word_list = list(set(word_list)) #先用set合并重复的单词,再用list创建单词列表
word_dict = {w: i for i, w in enumerate(word_list)} #单词转换为序号
number_dict = {i: w for i, w in enumerate(word_list)} #序号转换为单词
n_class = len(word_dict) # number of Vocabulary
# NNLM Parameter
n_step = 2 # # number of steps ['i like', 'i love', 'i hate'] #步长,即根据多少个词来预测下一个词,这里是根据前两个词预测第三个词
n_hidden = 2 # number of hidden units #隐藏层神经元个数
m = 2 # m in paper #词向量维度
def make_batch(sentences): #作用是将训练集中的句子的最后一个词和前面的词分开
input_batch = [] #空列表,用来存放输入
target_batch = [] #存放一次输入对应的输出
for sen in sentences: #对于训练集中的每个句子
word = sen.split() #先将句子中每个单词按空格分开
input = [word_dict[n] for n in word[:-1]] #[:-1]切片,即输入是一直到最后一个词,最后一个词不要
target = word_dict[word[-1]] #target是最后一个词
input_batch.append(np.eye(n_class)[input]) #将分离出来的input添加到输入列表
target_batch.append(np.eye(n_class)[target]) #将分离出来的target添加到标记列表
return input_batch, target_batch #返回输入和标记列表
# Model
X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, number of steps, number of Vocabulary]
Y = tf.placeholder(tf.float32, [None, n_class])
input = tf.reshape(X, shape=[-1, n_step * n_class]) # [batch_size, n_step * n_class]
H = tf.Variable(tf.random_normal([n_step * n_class, n_hidden])) #隐含层权重
d = tf.Variable(tf.random_normal([n_hidden])) #隐含层偏
U = tf.Variable(tf.random_normal([n_hidden, n_class])) #输出层权重
b = tf.Variable(tf.random_normal([n_class])) #输出层偏
tanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden]
model = tf.matmul(tanh, U) + b # [batch_size, n_class]
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
prediction =tf.argmax(model, 1) #输出 行最大
# Training
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
input_batch, target_batch = make_batch(sentences)
for epoch in range(5000):
_, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})
if (epoch + 1)%1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
# Predict
predict = sess.run([prediction], feed_dict={X: input_batch})
#print(predict)
# Test
input = [sen.split()[:2] for sen in sentences]
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]])
1-1. NNLM
最新推荐文章于 2024-04-23 19:45:43 发布