EntityNetwork网络 -文本分类

最新推荐文章于 2023-05-11 19:34:20 发布

旭旭_哥

最新推荐文章于 2023-05-11 19:34:20 发布

阅读量1.5k

点赞数

本文链接：https://blog.csdn.net/luoyexuge/article/details/79021914

版权

贴代码之前，写下自己最近情况，这篇博客估计是自己最后一篇文本分类的博客，深度学习各种算法都尝试过个遍，以深度学习为代表的模型做垃圾文本识别各种模型最后效果都差不多，如 entitynetwork、Hierarchical Attention Network for Document Classification、dynamic memory network、bi-lstm 、lstm-atten、cnn、dcnn、seq2seq with attention、fasttext 但是这些模型相比普通的机器学习算法如lr、xgboost、lightgbm还是稍微要强一点，不多说，做了这么多东西也是对自己在2017年有个交代，这年是伤感的一年，算法核心代码还是参考上一篇的dynamic memory network的参考地址，下面进入正题直接看代码,项目分为下面模块:

config.py

datautils.py

model.py

run.py

config.py:

class EntiyNetConfig:
num_classes = 2
learning_rate = 0.001
batch_size = 128
decay_steps = 1000
decay_rate = 0.9
sequence_length = 30
vocab_size = 10000
embed_size = 100
hidden_size = 100
is_training = True
story_length = 10
dropout_keep_prob = 0.8
use_bi_lstm = False
isgru=True
num_epochs=30
print_per_batch=100

datautils.py:

import os
import numpy as np
import codecs
import pickle

def _read_vocab(filename):
"""读取词汇列别"""
words=list(map(lambda line:line.strip(),codecs.open(filename,'r',encoding='utf-8').readlines()))
word_to_id=dict(zip(words,range(len(words))))
return words,word_to_id


def _read_file(filename,word_to_id,num_classes=2,max_sent_in_doc=10,max_word_in_sent=30):
data_x=[]
data_y=[]
ans=[]
with open(filename, "r") as f:
for line in f:
# doc=[]
doc = np.zeros((max_sent_in_doc, max_word_in_sent), dtype=np.int32)
doclabel = line.split("\t")
if len(doclabel)>1:
label=int(doclabel[0])
sents=doclabel[1].split("#")
for i, sent in enumerate(sents):
if i < max_sent_in_doc and sent!='':
for j, word in enumerate(sent.strip().split(" ")):
if j < max_word_in_sent and word!='' :
doc[i][j] = word_to_id.get(word, 0)
labels = [0] * num_classes
ans_single=[0]*max_word_in_sent
labels[label - 1] = 1
data_y.append(label)
data_x.append(doc.tolist())
ans_single[0] = word_to_id.get("entity", 0)
ans.append(ans_single)
#pickle.dump((data_x, data_y), open('tensorflow/business/business_data', 'wb'))
return data_x,data_y,ans


def preocess_file(data_path,vocapath):
"""一次性返回所有的数据"""
words,word_to_id=_read_vocab(vocapath)
x_train, y_train,ans = _read_file(data_path, word_to_id)
return x_train, y_train,words,ans

def batch_iter(data,batch_size=64,num_epochs=5):
"""生成批次数据"""
data=np.array(data)
data_size=len(data)
num_batchs_per_epchs=int((data_size-1)/batch_size)+1
for epoch in range(num_epochs):
indices=np.random.permutation(np.arange(data_size))
shufflfed_data=data[indices]
for batch_num in range(num_batchs_per_epchs):
start_index=batch_num*batch_size
end_index=min((batch_num + 1) * batch_size, data_size)
if end_index-start_index==batch_size:
yield shufflfed_data[start_index:end_index]
else:
data_choic=shufflfed_data[start_index:end_index]
size=np.shape(data_choic)[0]
batch_s=list(range(size))
aa_milne_arr=np.random.choice(batch_s,batch_size)
yield data_choic[aa_milne_arr,]

if __name__=='__main__':
path = "data/vocab.txt"
words,word_to_id=_read_vocab(path)
print(words[-1])
print(len(word_to_id))

model.py:

import tensorflow as tf
import numpy as np
import tensorflow.contrib as tf_contrib
import numpy as np
from tensorflow.contrib import rnn
#from a07_Transformer.a2_multi_head_attention import MultiHeadAttention

class EntityNetwork:
def __init__(self, config, multi_label_flag=False,block_size=20,
initializer=tf.random_normal_initializer(stddev=0.1),clip_gradients=5.0,use_bi_lstm=False,use_additive_attention=False):#0.01
"""init all hyperparameter here"""
# set hyperparamter
self.config = config
self.num_classes = self.config.num_classes
self.batch_size = self.config.batch_size
self.sequence_length = self.config.sequence_length
self.vocab_size = self.config.vocab_size
self.embed_size = self.config.embed_size
self.is_training = self.config.is_training
self.learning_rate = tf.Variable(self.config.learning_rate, trainable=False, name="learning_rate")#TODO ADD learning_rate
self.learning_rate_decay_half_op = tf.assign(self.learning_rate, self.learning_rate * 0.5)
self.initializer = initializer
self.multi_label_flag = multi_label_flag
self.hidden_size = self.config.hidden_size
self.clip_gradients=clip_gradients
self.story_length=self.config.story_length
self.block_size=block_size
self.use_bi_lstm=use_bi_lstm
self.dimension=self.hidden_size*2 if self.use_bi_lstm else self.hidden_size #if use bi-lstm, set dimension value, so it can be used later for parameter.
self.use_additive_attention=use_additive_attention

# add placeholder (X,label)
# self.input_x = tf.placeholder(tf.int32, [None, self.num_sentences,self.sequence_length], name="input_x") # X
self.story=tf.placeholder(tf.int32,[None,self.story_length,self.sequence_length],name="story")
self.query = tf.placeholder(tf.int32, [None, self.sequence_length], name="question")

self.answer_single = tf.placeholder(tf.int32, [None,], name="input_y") # y:[None,num_classes]
self.answer_multilabel = tf.placeholder(tf.float32, [None, self.num_classes],name="input_y_multilabel") # y:[None,num_classes]. this is for multi-label classification only.
self.dropout_keep_prob = tf.placeholder(tf.float32, name="keep_prob")

self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
self.epoch_step = tf.Variable(0, trainable=False, name="Epoch_Step")
self.epoch_increment = tf.assign(self.epoch_step, tf.add(self.epoch_step, tf.constant(1)))
self.decay_steps, self.decay_rate = self.config.decay_steps, self.config.decay_rate

self.instantiate_weights()
self.logits = self.inference() # [None, self.label_size]. main computation graph is here.

self.predictions = tf.argmax(self.logits, 1, name="predictions") # shape:[None,]
if not self.multi_label_flag:
correct_prediction = tf.equal(tf.cast(self.predictions, tf.int32),self.answer_single) # tf.argmax(self.logits, 1)-->[batch_size]
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Accuracy") # shape=()
else:
self.accuracy = tf.constant(0.5) # fuke accuracy. (you can calcuate accuracy outside of graph using method calculate_accuracy(...) in train.py)

if not self.config.is_training:
return
if multi_label_flag:
print("going to use multi label loss.")
self.loss_val = self.loss_multilabel()
else:
print("going to use single label loss.")
self.loss_val = self.loss()
self.train_op = self.train()

def inference(self):
"""main computation graph here: 1.input encoder 2.dynamic emeory 3.output layer """
# 1.input encoder
self.embedding_with_mask()
if self.use_bi_lstm:
self.input_encoder_bi_lstm()
else:
self.input_encoder_bow()
# 2. dynamic emeory
self.hidden_state=self.rnn_story() #[batch_size,block_size,hidden_size]. get hidden state after process the story

# 3.output layer
logits=self.output_module() #[batch_size,vocab_size]
return logits

def embedding_with_mask(self):
# 1.1 embedding for story and query
story_embedding = tf.nn.embedding_lookup(self.Embedding,self.story) # [batch_size,story_length,sequence_length,embed_size]
query_embedding=tf.nn.embedding_lookup(self.Embedding,self.query) # [batch_size,sequence_length,embed_size]
# 1.2 mask for story and query
story_mask=tf.get_variable("story_mask",[self.sequence_length,1],initializer=tf.constant_initializer(1.0))
query_mask=tf.get_variable("query_mask",[self.sequence_length,1],initializer=tf.constant_initializer(1.0))
# 1.3 multiply of embedding and mask for story and query
self.story_embedding=tf.multiply(story_embedding,story_mask) # [batch_size,story_length,sequence_length,embed_size]
self.query_embedding=tf.multiply(query_embedding,query_mask) # [batch_size,sequence_length,embed_size]

def input_encoder_bow(self):
# 1.4 use bag of words to encoder story and query
self.story_embedding=tf.reduce_sum(self.story_embedding,axis=2) #[batch_size,story_length,embed_size]
self.query_embedding=tf.reduce_sum(self.query_embedding,axis=1) #[batch_size,embed_size]

def input_encoder_bi_lstm(self):
"""use bi-directional lstm to encode query_embedding:[batch_size,sequence_length,embed_size]
and story_embedding:[batch_size,story_length,sequence_length,embed_size]
output:query_embedding:[batch_size,hidden_size*2] story_embedding:[batch_size,self.story_length,self.hidden_size*2]
"""
#1. encode query: bi-lstm layer
lstm_fw_cell = rnn.BasicLSTMCell(self.hidden_size) # forward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(self.hidden_size) # backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell, output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell == rnn.DropoutWrapper(lstm_bw_cell, output_keep_prob=self.dropout_keep_prob)
query_hidden_output, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, self.query_embedding,dtype=tf.float32,scope="query_rnn") # [batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
query_hidden_output = tf.concat(query_hidden_output, axis=2) #[batch_size,sequence_length,hidden_size*2]
self.query_embedding=tf.reduce_sum(query_hidden_output,axis=1) #[batch_size,hidden_size*2]
print("input_encoder_bi_lstm.self.query_embedding:",self.query_embedding)

#2. encode story
# self.story_embedding:[batch_size,story_length,sequence_length,embed_size]
self.story_embedding=tf.reshape(self.story_embedding,shape=(-1,self.story_length*self.sequence_length,self.embed_size)) #[self.story_length*self.sequence_length,self.embed_size]
lstm_fw_cell_story = rnn.BasicLSTMCell(self.hidden_size) # forward direction cell
lstm_bw_cell_story = rnn.BasicLSTMCell(self.hidden_size) # backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell_story = rnn.DropoutWrapper(lstm_fw_cell_story, output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell_story == rnn.DropoutWrapper(lstm_bw_cell_story, output_keep_prob=self.dropout_keep_prob)
story_hidden_output, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell_story, lstm_bw_cell_story, self.story_embedding,dtype=tf.float32,scope="story_rnn")
story_hidden_output=tf.concat(story_hidden_output,axis=2) #[batch_size,story_length*sequence_length,hidden_size*2]
story_hidden_output=tf.reshape(story_hidden_output,shape=(-1,self.story_length,self.sequence_length,self.hidden_size*2))
self.story_embedding = tf.reduce_sum(story_hidden_output, axis=2) # [batch_size,self.story_length,self.hidden_size*2]

def activation(self,features, scope=None): # scope=None
with tf.variable_scope(scope, 'PReLU', initializer=self.initializer):
alpha = tf.get_variable('alpha', features.get_shape().as_list()[1:])
pos = tf.nn.relu(features)
neg = alpha * (features - tf.abs(features)) * 0.5
return pos + neg

def output_module(self):
with tf.name_scope("score"):
"""
1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label.
input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory
:return:y: predicted label.[]
"""
# 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state.
# 1.1 get possibility distribution (of similiarity)
p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size]
# 1.2 get weighted sum of hidden state
u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size])

# 2.non-linearity of query and hidden state to get label
H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size])
activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden") #shape:[batch_size,hidden_size]
activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size]
y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size])
self.pred_y = tf.nn.softmax(y, name="pred_y")

tf.add_to_collection("pred_y", self.pred_y)
return y #shape:[batch_size,vocab_size]

def rnn_story(self):
"""
run rnn for story to get last hidden state
input is: story: [batch_size,story_length,embed_size]
:return: last hidden state. [batch_size,embed_size]
"""
# 1.split input to get lists.
input_split=tf.split(self.story_embedding,self.story_length,axis=1) #a list.length is:story_length.each element is:[batch_size,1,embed_size]
input_list=[tf.squeeze(x,axis=1) for x in input_split] #a list.length is:story_length.each element is:[batch_size,embed_size]
# 2.init keys(w_all) and values(h_all) of memory
h_all=tf.get_variable("hidden_states",shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
w_all=tf.get_variable("keys", shape=[self.block_size,self.dimension],initializer=self.initializer)# [block_size,hidden_size]
# 3.expand keys and values to prepare operation of rnn
w_all_expand=tf.tile(tf.expand_dims(w_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
h_all_expand=tf.tile(tf.expand_dims(h_all,axis=0),[self.batch_size,1,1]) #[batch_size,block_size,hidden_size]
# 4. run rnn using input with cell.
for i,input in enumerate(input_list):
h_all_expand=self.cell(input,h_all_expand,w_all_expand,i) #w_all:[batch_size,block_size,hidden_size]; h_all:[batch_size,block_size,hidden_size]
return h_all_expand #[batch_size,block_size,hidden_size]

def cell(self,s_t,h_all,w_all,i):
"""
parallel implementation of single time step for compute of input with memory
:param s_t: [batch_size,hidden_size].vector representation of current input(is a sentence).notice:hidden_size=embedding_size
:param w_all: [batch_size,block_size,hidden_size]
:param h_all: [batch_size,block_size,hidden_size]
:return: new hidden state: [batch_size,block_size,hidden_size]
"""
# 1.gate
s_t_expand=tf.expand_dims(s_t, axis=1) #[batch_size,1,hidden_size]
g=tf.nn.sigmoid(tf.multiply(s_t_expand,h_all)+tf.multiply(s_t_expand,w_all))#shape:[batch_size,block_size,hidden_size]

# 2.candidate hidden state
#below' shape:[batch_size*block_size,hidden_size]
h_candidate_part1=tf.matmul(tf.reshape(h_all,shape=(-1,self.dimension)), self.U) + tf.matmul(tf.reshape(w_all,shape=(-1,self.dimension)), self.V)+self.h_bias
print("======>h_candidate_part1:",h_candidate_part1) #(160, 100)
h_candidate_part1=tf.reshape(h_candidate_part1,shape=(self.batch_size,self.block_size,self.dimension)) #[batch_size,block_size,hidden_size]
h_candidate_part2=tf.expand_dims(tf.matmul(s_t,self.W)+self.h2_bias,axis=1) #shape:[batch_size,1,hidden_size]
h_candidate=self.activation(h_candidate_part1+h_candidate_part2,scope="h_candidate"+str(i)) #shape:[batch_size,block_size,hidden_size]

# 3.update hidden state
h_all=h_all+tf.multiply(g,h_candidate) #shape:[batch_size,block_size,hidden_size]

# 4.normalized hidden state
h_all=tf.nn.l2_normalize(h_all,-1) #shape:[batch_size,block_size,hidden_size]
return h_all #shape:[batch_size,block_size,hidden_size]

def loss(self, l2_lambda=0.0001): # 0.001
with tf.name_scope("loss"):
# input: `logits`:[batch_size, num_classes], and `labels`:[batch_size]
# output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss.
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.answer_single,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
# print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,)
loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=()
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss

def loss_multilabel(self, l2_lambda=0.0001): #this loss function is for multi-label classification
with tf.name_scope("loss"):
# input_y:shape=(?, 1999); logits:shape=(?, 1999)
# let `x = logits`, `z = labels`. The logistic loss is:z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.answer_multilabel,logits=self.logits); #[None,self.num_classes]. losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input__y,logits=self.logits)
#losses=self.smoothing_cross_entropy(self.logits,self.answer_multilabel,self.num_classes) #shape=(512,)
losses = tf.reduce_sum(losses, axis=1) # shape=(?,). loss for all data in the batch
loss = tf.reduce_mean(losses) # shape=(). average loss in the batch
l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
loss = loss + l2_losses
return loss

def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing

def train(self):
"""based on the loss, use SGD to update parameter"""
learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps,
self.decay_rate, staircase=True)
self.learning_rate_=learning_rate
#noise_std_dev = tf.constant(0.3) / (tf.sqrt(tf.cast(tf.constant(1) + self.global_step, tf.float32))) #gradient_noise_scale=noise_std_dev
train_op = tf_contrib.layers.optimize_loss(self.loss_val, global_step=self.global_step,
learning_rate=learning_rate, optimizer="Adam",clip_gradients=self.clip_gradients)
return train_op

#:param s_t: vector representation of current input(is a sentence). shape:[batch_size,sequence_length,embed_size]
#:param h: value(hidden state).shape:[hidden_size]
#:param w: key.shape:[hidden_size]
def instantiate_weights(self):
"""define all weights here"""
with tf.variable_scope("output_module"):
self.H=tf.get_variable("H",shape=[self.dimension,self.dimension],initializer=self.initializer)
self.R = tf.get_variable("R", shape=[self.dimension, self.num_classes], initializer=self.initializer)
self.y_bias=tf.get_variable("y_bias",shape=[self.num_classes])
self.b_projected = tf.get_variable("b_projection", shape=[self.num_classes])
self.h_u_bias=tf.get_variable("h_u_bias",shape=[self.dimension])

with tf.variable_scope("dynamic_memory"):
self.U=tf.get_variable("U",shape=[self.dimension,self.dimension],initializer=self.initializer)
self.V=tf.get_variable("V",shape=[self.dimension,self.dimension],initializer=self.initializer)
self.W=tf.get_variable("W",shape=[self.dimension,self.dimension],initializer=self.initializer)
self.h_bias=tf.get_variable("h_bias",shape=[self.dimension])
self.h2_bias = tf.get_variable("h2_bias", shape=[self.dimension])

with tf.variable_scope("embedding_projection"): # embedding matrix
self.Embedding = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size],initializer=self.initializer)
self.Embedding_label = tf.get_variable("Embedding_label", shape=[self.num_classes, self.embed_size],dtype=tf.float32) #,initializer=self.initializer
#self.W_projection = tf.get_variable("W_projection", shape=[self.hidden_size * 4, self.num_classes],initializer=self.initializer) # [embed_size,label_size]
#self.b_projection = tf.get_variable("b_projection", shape=[self.num_classes])

with tf.variable_scope("attention"):
self.W_w_attention_word = tf.get_variable("W_w_attention_word",shape=[self.hidden_size * 2, self.hidden_size * 2],initializer=self.initializer)
self.W_b_attention_word = tf.get_variable("W_b_attention_word", shape=[self.hidden_size * 2])
self.context_vecotor_word = tf.get_variable("what_is_the_informative_word", shape=[self.hidden_size * 2],initializer=self.initializer) # TODO o.k to use batch_size in first demension?

run.py:

from config import EntiyNetConfig
from entity_network import EntityNetwork
from datautils import preocess_file,batch_iter
import time
import tensorflow as tf
import os
import numpy as np
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
#noexperience
#business
#together
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify-ha/tensorflow/business/han.txt"
vocapath=basepath+"/credit-tftextclassify-ha/tensorflow/business/vocabha.txt"
modelpath=basepath+"/credit-tftextclassify-ha/tensorflow/business/"

print(modelpath,"attenion相关模型开始训练")
def run_epoch(rnn=False):
# 载入数据
print('Loading data...')
start_time = time.time()
x_train, y_train, words,ans = preocess_file(data_path,
vocapath)
config = EntiyNetConfig()
if config.isgru:
print('Using EntityNetwork gru model...')
else:
print('Using EntityNetwork lstm model...')
config.vocab_size = len(words)
print("vocab_size is:", config.vocab_size)
model = EntityNetwork(config)
tensorboard_dir = basepath+'/boardlog'

end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
print('Time usage:', time_dif)

print('Constructing TensorFlow Graph...')
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()

# 配置 tensorboard
tf.summary.scalar("loss", model.loss_val)
tf.summary.scalar("accuracy", model.accuracy)

if not os.path.exists(tensorboard_dir):
os.makedirs(tensorboard_dir)

merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(tensorboard_dir)
writer.add_graph(session.graph)

# 生成批次数据
print('Generating batch...')
batch_train = batch_iter(list(zip(x_train, y_train,ans)),
config.batch_size, config.num_epochs)

def feed_data(batch):
"""准备需要喂入模型的数据"""
x_batch, y_batch,ans = zip(*batch)


feed_dict = {
model.story: x_batch,
model.answer_single: y_batch,
model.query:ans

}
return feed_dict, len(x_batch)

def evaluate(x_, y_):
"""
模型评估
一次运行所有的数据会OOM，所以需要分批和汇总
"""
batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)

total_loss = 0.0
total_acc = 0.0
cnt = 0
for batch in batch_eval:
feed_dict, cur_batch_len = feed_data(batch)
feed_dict[model.keep_prob] = 1.0
loss, acc = session.run([model.loss, model.acc],
feed_dict=feed_dict)
total_loss += loss * cur_batch_len
total_acc += acc * cur_batch_len
cnt += cur_batch_len

return total_loss / cnt, total_acc / cnt

# 训练与验证
print('Training and evaluating...')
start_time = time.time()
print_per_batch = config.print_per_batch
for i, batch in enumerate(batch_train):
feed_dict, _ = feed_data(batch)
feed_dict[model.dropout_keep_prob] = config.dropout_keep_prob
if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
s = session.run(merged_summary, feed_dict=feed_dict)
writer.add_summary(s, i)

if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
loss_train, acc_train = session.run([model.loss_val, model.accuracy],
feed_dict=feed_dict)
#loss, acc = evaluate(x_val, y_val) 验证机暂时不需要

# 时间
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))

msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
+ ' Time: {3}'
print(msg.format(i + 1, loss_train, acc_train, time_dif))

if i%500==0 and i>0:
#
graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
["keep_prob", "story","question","score/pred_y"])

tf.train.write_graph(graph, ".", modelpath+"entry.model",
as_text=False)
print("entity network网络模型在第{0}步已经保存".format(i))
session.run(model.train_op, feed_dict=feed_dict) # 运行优化

session.close()

if __name__ == '__main__':

run_epoch(rnn=False)


训练情况:

Iter: 1000, Train Loss: 0.018, Train Acc: 100.00%, Time: 0:01:01
Converted 25 variables to const ops.
entity network网络模型在第1000步已经保存
Iter: 1100, Train Loss: 0.023, Train Acc: 100.00%, Time: 0:01:08
Iter: 1200, Train Loss: 0.014, Train Acc: 100.00%, Time: 0:01:14
Iter: 1300, Train Loss: 0.03, Train Acc: 99.22%, Time: 0:01:20
Iter: 1400, Train Loss: 0.015, Train Acc: 100.00%, Time: 0:01:26
Iter: 1500, Train Loss: 0.016, Train Acc: 100.00%, Time: 0:01:32
Converted 25 variables to const ops.
entity network网络模型在第1500步已经保存
Iter: 1600, Train Loss: 0.016, Train Acc: 100.00%, Time: 0:01:38

最后关于模型怎么在java上部署在上一篇博客已经提到，在这里不再啰嗦，文本分类告一段落，over