Dynamic Memory Networks文本分类及java接口调用

最新推荐文章于 2024-06-21 13:47:19 发布

旭旭_哥

最新推荐文章于 2024-06-21 13:47:19 发布

阅读量687

点赞数

本文链接：https://blog.csdn.net/luoyexuge/article/details/79017102

版权

Dynamic Memory Networks记忆网络主要是Facebook开源的算法，主要用用问答系统，但是可以转化为一个文本分类的问题，花了一天时间调试代码，可以用了，核心代码参考的是https://github.com/yongyehuang/zhihu-text-classification，感谢，在Dynamic Memory Networks上面的的基础上稍微改了一些，下面直接看代码情况:

主要代码分为：

config.py

data_utils.py

model.py

run.py

config.py:

class DYConfig:
num_classes = 2
learning_rate = 0.001
batch_size = 128
decay_steps = 1000
decay_rate = 0.9
sequence_length = 30
vocab_size = 10000
embed_size = 100
hidden_size = 100
is_training = True
story_length = 10
dropout_keep_prob = 0.8
isgru=True
num_epochs=30
print_per_batch=100

data_utils.py:

import os
import numpy as np
import codecs
import pickle

def _read_vocab(filename):
"""读取词汇列别"""
words=list(map(lambda line:line.strip(),codecs.open(filename,'r',encoding='utf-8').readlines()))
word_to_id=dict(zip(words,range(len(words))))
return words,word_to_id


def _read_file(filename,word_to_id,num_classes=2,max_sent_in_doc=10,max_word_in_sent=30):
data_x=[]
data_y=[]
ans=[]
with open(filename, "r") as f:
for line in f:
# doc=[]
doc = np.zeros((max_sent_in_doc, max_word_in_sent), dtype=np.int32)
doclabel = line.split("\t")
if len(doclabel)>1:
label=int(doclabel[0])
sents=doclabel[1].split("#")
for i, sent in enumerate(sents):
if i < max_sent_in_doc and sent!='':
for j, word in enumerate(sent.strip().split(" ")):
if j < max_word_in_sent and word!='' :
doc[i][j] = word_to_id.get(word, 0)
labels = [0] * num_classes
ans_single=[0]*max_word_in_sent
labels[label - 1] = 1
data_y.append(label)
data_x.append(doc.tolist())
ans_single[0] = word_to_id.get("dymanic", 0)
ans.append(ans_single)
#pickle.dump((data_x, data_y), open('tensorflow/business/business_data', 'wb'))
return data_x,data_y,ans


def preocess_file(data_path,vocapath):
"""一次性返回所有的数据"""
words,word_to_id=_read_vocab(vocapath)
x_train, y_train,ans = _read_file(data_path, word_to_id)
return x_train, y_train,words,ans

def batch_iter(data,batch_size=64,num_epochs=5):
"""生成批次数据"""
data=np.array(data)
data_size=len(data)
num_batchs_per_epchs=int((data_size-1)/batch_size)+1
for epoch in range(num_epochs):
indices=np.random.permutation(np.arange(data_size))
shufflfed_data=data[indices]
for batch_num in range(num_batchs_per_epchs):
start_index=batch_num*batch_size
end_index=min((batch_num + 1) * batch_size, data_size)
if end_index-start_index==batch_size:
yield shufflfed_data[start_index:end_index]
else:
data_choic=shufflfed_data[start_index:end_index]
size=np.shape(data_choic)[0]
batch_s=list(range(size))
aa_milne_arr=np.random.choice(batch_s,batch_size)
yield data_choic[aa_milne_arr,]

if __name__=='__main__':
path = "data/class/vocab.txt"
words,word_to_id=_read_vocab(path)
print(words[-1])
print(len(word_to_id))

model.py

import tensorflow as tf
import numpy as np
import tensorflow.contrib as tf_contrib
import numpy as np
from tensorflow.contrib import rnn

class DynamicMemoryNetwork:
def __init__(self, config, num_pass=2,use_gated_gru=True,decode_with_sequences=False,multi_label_flag=False,
initializer=tf.random_normal_initializer(stddev=0.1),clip_gradients=5.0,l2_lambda=0.0001):
"""init all hyperparameter here"""
# set hyperparamter
self.config=config
self.num_classes =self.config.num_classes

self.sequence_length = self.config.sequence_length
self.vocab_size = self.config.vocab_size
self.embed_size = self.config.embed_size
self.is_training = self.config.is_training
self.learning_rate = tf.Variable(self.config.learning_rate, trainable=False, name="learning_rate")
self.learning_rate_decay_half_op = tf.assign(self.learning_rate, self.learning_rate * 0.5)
self.initializer = initializer
self.multi_label_flag = multi_label_flag
self.hidden_size = self.config.hidden_size
self.clip_gradients=clip_gradients
self.story_length=self.config.story_length
#self.dimension=self.hidden_size*2 if self.use_bi_lstm else self.hidden_size #if use bi-lstm, set dimension value, so it can be used later for parameter.
self.num_pass=num_pass #number of pass to run for episodic memory module. for example, num_pass=2
self.use_gated_gru=use_gated_gru #if this is True. we will use gated gru as our 'Memory Update Mechanism'
self.decode_with_sequences=decode_with_sequences
self.l2_lambda=l2_lambda

# add placeholder (X,label)
self.batch_size = self.config.batch_size
self.story=tf.placeholder(tf.int32,[None,self.story_length,self.sequence_length],name="story")
self.query = tf.placeholder(tf.int32, [None, self.sequence_length], name="question")

self.answer_single = tf.placeholder(tf.int32, [None,], name="input_y") # y:[None,num_classes]
self.answer_multilabel = tf.placeholder(tf.float32, [None, self.num_classes],name="input_y_multilabel") # y:[None,num_classes]. this is for multi-label classification only.
self.dropout_keep_prob = tf.placeholder(tf.float32, name="keep_prob")
self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
self.epoch_step = tf.Variable(0, trainable=False, name="Epoch_Step")
self.epoch_increment = tf.assign(self.epoch_step, tf.add(self.epoch_step, tf.constant(1)))
self.decay_steps, self.decay_rate = self.config.decay_steps, self.config.decay_rate

self.instantiate_weights()
self.logits = self.inference() # [None, self.label_size]. main computation graph is here.

self.predictions = tf.argmax(self.logits, 1, name="predictions") # shape:[None,]
#label = tf.argmax(self.answer_single, axis=1, name='label')


if not self.multi_label_flag:
correct_prediction = tf.equal(tf.cast(self.predictions, tf.int32),self.answer_single) # tf.argmax(self.logits, 1)-->[batch_size]
#correct_prediction = tf.equal(tf.cast(self.predictions, tf.int32),tf.cast(label, tf.int32) )
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Accuracy") # shape=()
else:
self.accuracy = tf.constant(0.5) # fuke accuracy. (you can calcuate accuracy outside of graph using method calculate_accuracy(...) in train.py)

if not self.config.is_training:
return
if multi_label_flag:
print("going to use multi label loss.")
self.loss_val = self.loss_multilabel()
else:
print("going to use single label loss.")
self.loss_val = self.loss()
self.train_op = self.train()

def inference(self):
"""main computation graph here: a.Input Module,b.Question Module,c.Episodic Memory Module,d.Answer Module """
# 1.Input Module
self.input_module() #[batch_size,story_length,hidden_size
# 2.question module
self.question_module() #[batch_size,hidden_size]
# 3.episodic memory module
self.episodic_memory_module() #[batch_size,hidden_size]
# 4. answer module
logits=self.answer_module() #[batch_size,vocab_size]

return logits

def input_module(self):
"""encode raw texts into vector representation"""
story_embedding=tf.nn.embedding_lookup(self.Embedding,self.story) # [batch_size,story_length,sequence_length,embed_size]
print("story_embedding size is :",story_embedding.shape)
story_embedding=tf.reshape(story_embedding,(self.batch_size,self.story_length,self.sequence_length*self.embed_size))
#story_embedding=tf.reshape(story_embedding,(-1,self.story_length,self.sequence_length*self.embed_size))

print("story_embedding size is :", story_embedding.shape)
hidden_state=tf.ones((self.batch_size,self.hidden_size),dtype=tf.float32)
cell = rnn.GRUCell(self.hidden_size)
self.story_embedding,hidden_state=tf.nn.dynamic_rnn(cell,story_embedding,dtype=tf.float32,scope="input_module")

def question_module(self):
"""
input:tokens of query:[batch_size,sequence_length]
:return: representation of question:[batch_size,hidden_size]
"""
query_embedding = tf.nn.embedding_lookup(self.Embedding, self.query) # [batch_size,sequence_length,embed_size]
cell=rnn.GRUCell(self.hidden_size)
_,self.query_embedding=tf.nn.dynamic_rnn(cell,query_embedding,dtype=tf.float32,scope="question_module") #query_embedding:[batch_size,hidden_size]

def episodic_memory_module(self):#input(story):[batch_size,story_length,hidden_size]
"""
episodic memory module
1.combine features
1.attention mechansim using gate function.take fact representation c,question q,previous memory m_previous
2.use gated-gru to update hidden state
3.set last hidden state as episode result
4.use gru to update final memory using episode result

input: story(from input module):[batch_size,story_length,hidden_size]
output: last hidden state:[batch_size,hidden_size]
"""
candidate_inputs=tf.split(self.story_embedding,self.story_length,axis=1) # a list. length is: story_length. each element is:[batch_size,1,embedding_size]
candidate_list=[tf.squeeze(x,axis=1) for x in candidate_inputs] # a list. length is: story_length. each element is:[batch_size ,embedding_size]
m_current=self.query_embedding
h_current = tf.zeros((self.batch_size, self.hidden_size))
for pass_number in range(self.num_pass):#for each candidate sentence in the list,do loop.
# 1. attention mechansim.take fact representation c,question q,previous memory m_previous
g = self.attention_mechanism_parallel(self.story_embedding, m_current,self.query_embedding,pass_number) # [batch_size,story_length]
# 2.below is Memory Update Mechanism
if self.use_gated_gru: #use gated gru to update episode. this is default method.
g = tf.split(g, self.story_length,axis=1) # a list. length is: sequence_length. each element is:[batch_size,1]
# 2.1 use gated-gru to update hidden state
for i,c_current in enumerate(candidate_list):
g_current=g[i] #[batch_size,1]
h_current=self.gated_gru(c_current,h_current,g_current) #h_current:[batch_size,hidden_size]. g[i] represent score( a scalar) for current candidate sentence:c_current.
# 2.2 assign last hidden state to e(episodic)
e_i=h_current #[batch_size,hidden_size]
else: #use weighted sum to get episode(e.g. used in question answering)
p_gate=tf.nn.softmax(g,dim=1) #[batch_size,story_length]. compute weight
p_gate=tf.expand_dims(p_gate,axis=2) #[batch_size,story_length,1]
e_i=tf.multiply(p_gate,self.story_embedding) #[batch_size,story_length,hidden_size]
e_i=tf.reduce_sum(e_i,axis=1) #[batch_size,story_length]
#3. use gru to update episodic memory m_i
m_current=self.gru_cell(e_i, m_current,"gru_episodic_memory") #[batch_size,hidden_size]
self.m_T=m_current #[batch_size,hidden_size]

def answer_module(self):
""" Answer Module:generate an answer from the final memory vector.
Input:
hidden state from episodic memory module:[batch_size,hidden_size]
question:[batch_size, embedding_size]
"""
with tf.name_scope("score"):
steps=self.sequence_length if self.decode_with_sequences else 1 #decoder for a list of tokens with sequence. e.g."x1 x2 x3 x4..."
a=self.m_T #init hidden state
y_pred=tf.zeros((self.batch_size,self.hidden_size)) #TODO usually we will init this as a special token '<GO>', you can change this line by pass embedding of '<GO>' from outside.
logits_list=[]
logits_return=None
for i in range(steps):
cell = rnn.GRUCell(self.hidden_size)
y_previous_q=tf.concat([y_pred,self.query_embedding],axis=1) #[batch_hidden_size*2]
_, a = cell( y_previous_q,a)
logits=tf.layers.dense(a,units=self.num_classes,name="denseoutput") #[batch_size,vocab_size]
logits_list.append(logits)
if self.decode_with_sequences:#need to get sequences.
logits_return = tf.stack(logits_list, axis=1) # [batch_size,sequence_length,num_classes]
else:#only need to get an answer, not sequences
logits_return = logits_list[0] #[batcj_size,num_classes]


self.pred_y = tf.nn.softmax(logits, name="pred_y")

tf.add_to_collection("pred_y", self.pred_y)

return logits_return
def gated_gru(self,c_current,h_previous,g_current):
"""
gated gru to get updated hidden state
:param c_current: [batch_size,embedding_size]
:param h_previous:[batch_size,hidden_size]
:param g_current: [batch_size,1]
:return h_current: [batch_size,hidden_size]
"""
# 1.compute candidate hidden state using GRU.
h_candidate=self.gru_cell(c_current, h_previous,"gru_candidate_sentence") #[batch_size,hidden_size]
# 2.combine candidate hidden state and previous hidden state using weight(a gate) to get updated hidden state.
h_current=tf.multiply(g_current,h_candidate)+tf.multiply(1-g_current,h_previous) #[batch_size,hidden_size]
return h_current

def attention_mechanism_parallel(self,c_full,m,q,i):
""" parallel implemtation of gate function given a list of candidate sentence, a query, and previous memory.
Input:
c_full: candidate fact. shape:[batch_size,story_length,hidden_size]
m: previous memory. shape:[batch_size,hidden_size]
q: question. shape:[batch_size,hidden_size]
Output: a scalar score (in batch). shape:[batch_size,story_length]
"""
q=tf.expand_dims(q,axis=1) #[batch_size,1,hidden_size]
m=tf.expand_dims(m,axis=1) #[batch_size,1,hidden_size]

# 1.define a large feature vector that captures a variety of similarities between input,memory and question vector: z(c,m,q)
c_q_elementwise=tf.multiply(c_full,q) #[batch_size,story_length,hidden_size]
c_m_elementwise=tf.multiply(c_full,m) #[batch_size,story_length,hidden_size]
c_q_minus=tf.abs(tf.subtract(c_full,q)) #[batch_size,story_length,hidden_size]
c_m_minus=tf.abs(tf.subtract(c_full,m)) #[batch_size,story_length,hidden_size]
# c_transpose Wq
c_w_q=self.x1Wx2_parallel(c_full,q,"c_w_q"+str(i)) #[batch_size,story_length,hidden_size]
c_w_m=self.x1Wx2_parallel(c_full,m,"c_w_m"+str(i)) #[batch_size,story_length,hidden_size]
# c_transposeWm
q_tile=tf.tile(q,[1,self.story_length,1]) #[batch_size,story_length,hidden_size]
m_tile=tf.tile(m,[1,self.story_length,1]) #[batch_size,story_length,hidden_size]
z=tf.concat([c_full,m_tile,q_tile,c_q_elementwise,c_m_elementwise,c_q_minus,c_m_minus,c_w_q,c_w_m],2) #[batch_size,story_length,hidden_size*9]
# 2. two layer feed foward
g=tf.layers.dense(z,self.hidden_size*3,activation=tf.nn.tanh) #[batch_size,story_length,hidden_size*3]
g=tf.layers.dense(g,1,activation=tf.nn.sigmoid) #[batch_size,story_length,1]
g=tf.squeeze(g,axis=2) #[batch_size,story_length]
return g

def x1Wx2_parallel(self,x1,x2,scope):
"""
:param x1: [batch_size,story_length,hidden_size]
:param x2: [batch_size,1,hidden_size]
:param scope: a string
:return: [batch_size,story_length,hidden_size]
"""
with tf.variable_scope(scope):
x1=tf.reshape(x1,shape=(self.batch_size,-1)) #[batch_size,story_length*hidden_size]
x1_w=tf.layers.dense(x1,self.story_length*self.hidden_size,use_bias=False) #[self.hidden_size, story_length*self.hidden_size]
x1_w_expand=tf.expand_dims(x1_w,axis=2) #[batch_size,story_length*self.hidden_size,1]
x1_w_x2=tf.matmul(x1_w_expand,x2) #[batch_size,story_length*self.hidden_size,hidden_size]
x1_w_x2=tf.reshape(x1_w_x2,shape=(self.batch_size,self.story_length,self.hidden_size,self.hidden_size))
x1_w_x2=tf.reduce_sum(x1_w_x2,axis=3) #[batch_size,story_length,hidden_size]
return x1_w_x2

def gru_cell(self, Xt, h_t_minus_1,variable_scope):
"""
single step of gru
:param Xt: Xt:[batch_size,hidden_size]
:param h_t_minus_1:[batch_size,hidden_size]
:return:[batch_size,hidden_size]
"""
with tf.variable_scope(variable_scope):
# 1.update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z) + tf.matmul(h_t_minus_1,self.U_z) + self.b_z) # z_t:[batch_size,self.hidden_size]
# 2.reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r) + tf.matmul(h_t_minus_1,self.U_r) + self.b_r) # r_t:[batch_size,self.hidden_size]
# 3.compute candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h) +r_t * (tf.matmul(h_t_minus_1, self.U_h)) + self.b_h) # h_t_candiate:[batch_size,self.hidden_size]
# 4.compute new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size,hidden_size]
return h_t

def loss(self, l2_lambda=0.0001): # 0.001
with tf.name_scope("loss"):
# input: `logits`:[batch_size, num_classes], and `labels`:[batch_size]
# output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss.
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_single,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
# print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,)
loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=()
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss

def loss_multilabel(self, l2_lambda=0.0001): #0.0001 this loss function is for multi-label classification
with tf.name_scope("loss"):
# input_y:shape=(?, 1999); logits:shape=(?, 1999)
# let `x = logits`, `z = labels`. The logistic loss is:z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.answer_multilabel,logits=self.logits); #[None,self.num_classes]. losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input__y,logits=self.logits)
#losses=self.smoothing_cross_entropy(self.logits,self.answer_multilabel,self.num_classes) #shape=(512,)
losses = tf.reduce_sum(losses, axis=1) # shape=(?,). loss for all data in the batch
loss = tf.reduce_mean(losses) # shape=(). average loss in the batch
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss

def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing

def train(self):
"""based on the loss, use SGD to update parameter"""
learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps,
self.decay_rate, staircase=True)
self.learning_rate_=learning_rate
#noise_std_dev = tf.constant(0.3) / (tf.sqrt(tf.cast(tf.constant(1) + self.global_step, tf.float32))) #gradient_noise_scale=noise_std_dev
train_op = tf_contrib.layers.optimize_loss(self.loss_val, global_step=self.global_step,learning_rate=learning_rate, optimizer="Adam",clip_gradients=self.clip_gradients)
return train_op

#:param s_t: vector representation of current input(is a sentence). shape:[batch_size,sequence_length,embed_size]
#:param h: value(hidden state).shape:[hidden_size]
#:param w: key.shape:[hidden_size]
def instantiate_weights(self):
"""define all weights here"""
with tf.variable_scope("gru_cell"):
self.W_z = tf.get_variable("W_z", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.U_z = tf.get_variable("U_z", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.b_z = tf.get_variable("b_z", shape=[self.hidden_size])
# GRU parameters:reset gate related
self.W_r = tf.get_variable("W_r", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.U_r = tf.get_variable("U_r", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.b_r = tf.get_variable("b_r", shape=[self.hidden_size])

self.W_h = tf.get_variable("W_h", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.U_h = tf.get_variable("U_h", shape=[self.embed_size, self.hidden_size], initializer=self.initializer)
self.b_h = tf.get_variable("b_h", shape=[self.hidden_size])

with tf.variable_scope("embedding_projection"): # embedding matrix
self.Embedding = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size],initializer=self.initializer)

run.py:

from config import DYConfig
from dynamic_memory_network import DynamicMemoryNetwork
from datautils import preocess_file,batch_iter
import time
import tensorflow as tf
import os
import numpy as np
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
#noexperience
#business
#together
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify-ha/tensorflow/business/han.txt"
vocapath=basepath+"/credit-tftextclassify-ha/tensorflow/business/vocabha.txt"
modelpath=basepath+"/credit-tftextclassify-ha/tensorflow/business/"

print(modelpath,"attenion相关模型开始训练")
def run_epoch(rnn=False):
# 载入数据
print('Loading data...')
start_time = time.time()
x_train, y_train, words,ans = preocess_file(data_path,
vocapath)
config = DYConfig()
if config.isgru:
print('Using dynamicmm gru model...')
else:
print('Using dynamicmm lstm model...')
config.vocab_size = len(words)
print("vocab_size is:", config.vocab_size)
model = DynamicMemoryNetwork(config)
tensorboard_dir = basepath+'/boardlog'

end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
print('Time usage:', time_dif)

print('Constructing TensorFlow Graph...')
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()

# 配置 tensorboard
tf.summary.scalar("loss", model.loss_val)
tf.summary.scalar("accuracy", model.accuracy)

if not os.path.exists(tensorboard_dir):
os.makedirs(tensorboard_dir)

merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(tensorboard_dir)
writer.add_graph(session.graph)

# 生成批次数据
print('Generating batch...')
batch_train = batch_iter(list(zip(x_train, y_train,ans)),
config.batch_size, config.num_epochs)

def feed_data(batch):
"""准备需要喂入模型的数据"""
x_batch, y_batch,ans = zip(*batch)


feed_dict = {
model.story: x_batch,
model.answer_single: y_batch,
model.query:ans

}
return feed_dict, len(x_batch)

def evaluate(x_, y_):
"""
模型评估
一次运行所有的数据会OOM，所以需要分批和汇总
"""
batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)

total_loss = 0.0
total_acc = 0.0
cnt = 0
for batch in batch_eval:
feed_dict, cur_batch_len = feed_data(batch)
feed_dict[model.keep_prob] = 1.0
loss, acc = session.run([model.loss, model.acc],
feed_dict=feed_dict)
total_loss += loss * cur_batch_len
total_acc += acc * cur_batch_len
cnt += cur_batch_len

return total_loss / cnt, total_acc / cnt

# 训练与验证
print('Training and evaluating...')
start_time = time.time()
print_per_batch = config.print_per_batch
for i, batch in enumerate(batch_train):
feed_dict, _ = feed_data(batch)
feed_dict[model.dropout_keep_prob] = config.dropout_keep_prob
if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
s = session.run(merged_summary, feed_dict=feed_dict)
writer.add_summary(s, i)

if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
loss_train, acc_train = session.run([model.loss_val, model.accuracy],
feed_dict=feed_dict)
#loss, acc = evaluate(x_val, y_val) 验证机暂时不需要

# 时间
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))

msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
+ ' Time: {3}'
print(msg.format(i + 1, loss_train, acc_train, time_dif))

if i%500==0 and i>0:
#
graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
["keep_prob", "story","question","score/pred_y"])

tf.train.write_graph(graph, ".", modelpath+"dy.model",
as_text=False)
print("动态几亿网络模型在第{0}步已经保存".format(i))
session.run(model.train_op, feed_dict=feed_dict) # 运行优化

session.close()

if __name__ == '__main__':

run_epoch(rnn=False)

模型训练好要在java上使用，必须写两个方法，下面具体看下:

/**
* Dynamic Memory Networks模型 将文本转化为索引的形式
*
* @param text
* 输入文本，
* @param map
* 特征集合
*/

public static int[][][] gettexttoidByCutDMN(String text, Map<String, Integer> map) {
int[][][] story = new int[128][10][30];

if (StringUtils.isBlank(text)) {
return story;
}
String docword = WordUtilHAN.getSegmentHANModelStr(text);
if (StringUtils.isBlank(docword)) {
return story;
}

String[] sents=docword.split("#");
for(int i=0;i<sents.length&& i<10;i++){
if(StringUtils.isNotBlank(sents[i])){
String[] words=sents[i].trim().split(" ");
for(int j=0;j<words.length&& j<30;j++){
if(StringUtils.isNotBlank(words[j])){
if(map.containsKey(words[j])){
story[0][i][j]=map.get(words[j]);
}
}
}
}

}
return story;
}

/**
* 获取凑字乱打模型概率，基于卷积神经网络
*
* @param text
* 文本输入
* @param sess
* tensorflow的sess
* @param map
* 特征
* @param keep_prob
* dropout比例
*/
public static double getClassifyTogether(String text, Session sess, Map<String, Integer> map, Tensor keep_prob) {
if (StringUtils.isBlank(text)) {
return 0.0;
}
/**
* 判断是否为英文 如果是英文不做任何处理，直接返回0
*/
if (text.split("\\s+").length >= 4 && WordUtil.getEnglistSentenceRate(text) > 0.7) {
return 0.0;
}
int[][] arr = gettexttoidTogetherByCut(text, map);
Tensor input = Tensor.create(arr);
Tensor result = sess.runner().feed("input_x", input).feed("keep_prob", keep_prob).fetch("score/pred_y").run()
.get(0);

long[] rshape = result.shape();
int nlabels = (int) rshape[1];
int batchSize = (int) rshape[0];

float[][] logits = result.copyTo(new float[batchSize][nlabels]);

if (nlabels > 1 && batchSize > 0) {
return logits[0][1];
}

return 0.0;
}

接口调用:

package com.dianping.text.classify.base;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tensorflow.Graph;
import org.tensorflow.Session;
import org.tensorflow.Tensor;

import com.dianping.text.classify.util.TersorflowUtils;
import com.dianping.text.classifybydl.api.service.Category;

public class DyMeNetworks implements Category {
private static final Logger logger = LoggerFactory.getLogger(DyMeNetworks.class);
private Graph g;
private Session sess;
private Tensor keep_prob;
private Tensor question;
private Map<String, Integer> map;

private void init() {
g = new Graph();
keep_prob = Tensor.create(1.0f);
question=getquestion();
try {
updataMap();
byte[] graphDef = TersorflowUtils
.readAllBytesOrExit(Paths.get(this.getClass().getResource("/").getPath(), "modeldynamic/dy.model"));
g.importGraphDef(graphDef);
sess = new Session(g);

} catch (Exception e) {
logger.error(" Dynamic Memory Networks model load:", e);
}
}

public Tensor getquestion(){
int [][] arr=new int[128][30];
Tensor input = Tensor.create(arr);
return input;

}
public void updataMap() {
map = new HashMap<>();
int i = 0;
try {
BufferedReader buffer = null;
String path = this.getClass().getResource("/").getPath() + "modeldynamic/vocab.txt";
buffer = new BufferedReader(new InputStreamReader(new FileInputStream(path)));

String line = buffer.readLine().trim();
while (line != null) {
map.put(line, i++);
line = buffer.readLine().trim();
}
buffer.close();

} catch (Exception e) {

}
System.out.println("Dynamic Memory Networks map.size is:" + map.size());

}

@Override
public double getClassify(String text) {
return TersorflowUtils.getClassifyByDyMeNetworks(text, sess, map, question, keep_prob);
}
public static void main(String[] args) {
DyMeNetworks dyMeNetworks=new DyMeNetworks();
dyMeNetworks.init();
System.out.println(dyMeNetworks.getClassify("欢迎大家来品尝.......！物美价廉很好吃"));

}


}

结果：

2018-01-09 20:15:01,145 main INFO Log4j appears to be running in a Servlet environment, but there's no log4j-web module available. If you want better web container support, please add the log4j-web JAR to your web archive or server lib directory.

2018-01-09 20:15:01,166 main INFO create XMDFileAppender [name=appAppender fullFileName=/data/applogs//credit-textclassifybydl-service//logs/app.log appkey=credit-textclassifybydl-service fullFilePattern=/data/applogs//credit-textclassifybydl-service//logs/app.log-%d{yyyy-MM-dd}-%i.log]

2018-01-09 20:15:01,170 main INFO create XMDFileAppender [name=XMDFileAppender fullFileName=/data/applogs/credit-textclassifybydl-service/request.log appkey=credit-textclassifybydl-service fullFilePattern=/data/applogs/credit-textclassifybydl-service/request.log-%d{yyyy-MM-dd}-%i.log]

2018-01-09 20:15:01,173 main INFO create XMDFileAppender [name=ERROR-LOG fullFileName=/data/applogs/credit-textclassifybydl-service/error.log appkey=credit-textclassifybydl-service fullFilePattern=/data/applogs/credit-textclassifybydl-service/error.log-%d{yyyy-MM-dd}-%i.log]

2018-01-09 20:15:01,180 main INFO create XMDFileAppender [name=INFO-LOG fullFileName=/data/applogs/credit-textclassifybydl-service/info.log appkey=credit-textclassifybydl-service fullFilePattern=/data/applogs/credit-textclassifybydl-service/info.log-%d{yyyy-MM-dd}-%i.log]

2018-01-09 20:15:01,187 main INFO create XMDFileAppender [name=WARN-LOG fullFileName=/data/applogs/credit-textclassifybydl-service/warn.log appkey=credit-textclassifybydl-service fullFilePattern=/data/applogs/credit-textclassifybydl-service/warn.log-%d{yyyy-MM-dd}-%i.log]

2018-01-09 20:15:01,189 main INFO Log4j appears to be running in a Servlet environment, but there's no log4j-web module available. If you want better web container support, please add the log4j-web JAR to your web archive or server lib directory.

2018-01-09 20:15:01.590196: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.

2018-01-09 20:15:01.590243: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.

2018-01-09 20:15:01.590248: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.

2018-01-09 20:15:01.590251: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.

Dynamic Memory Networks map.size is:23144

log4j:WARN No appenders could be found for logger (org.ansj.util.MyStaticValue).

log4j:WARN Please initialize the log4j system properly.

log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.

0.9977214932441711