Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification代码调试

爱编程的喵喵

已于 2024-06-30 13:09:07 修改

阅读量2.8k

点赞数 15

分类专栏：深度学习理论文章标签：机器学习 python 深度学习

于 2019-05-19 16:55:27 首次发布

本文链接：https://blog.csdn.net/weixin_43178406/article/details/90344378

版权

深度学习理论专栏收录该内容

14 篇文章 7 订阅

订阅专栏

大家好，我是爱编程的喵喵。双985硕士毕业，现担任全栈工程师一职，热衷于将数据思维应用到工作与生活中。从事机器学习以及相关的前后端开发工作。曾在阿里云、科大讯飞、CCF等比赛获得多次Top名次。现为CSDN博客专家、人工智能领域优质创作者。喜欢通过博客创作的方式对所学的知识进行总结与归纳，不仅形成深入且独到的理解，而且能够帮助新手快速入门。

本文主要介绍了Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification代码调试，希望能对同学们有所帮助。

在这里插入图片描述

1. 对数据进行预处理

在数据预处理过程中，遇到的主要问题是列表套列表，有时甚至会出现5层。对于多维数据的处理，尤其在用for循环以及转换为array时较容易出错。之所以会出现列表套列表，是因为每对实体可能对应多个label值，而且每对实体每个label也可能对应多个句子。

构造特征时，主要有三大特征，一为字向量，二为字与实体1相对位置编码，三为字与实体2相对位置编码。在这一过程中，由于每个句子的长度（字的个数）是不一样的，故需要指定一个固定长度，当句子大于该固定长度时，舍弃后面的；当句子长度小于该固定长度时，向后填充（使用随机初始化的向量和字BLANK）

下面为具体代码：

import numpy as np

# 定义单词距离实体的位置信息
def pos_embed(x):
   if x < -60:
       return 0
   if -60 <= x <= 60:
       return x + 61
   if x > 60:
       return 122

# 定义y中1的index
def find_index(x, y):
   flag = -1
   for i in range(len(y)):
       if x != y[i]:
           continue
       else:
           return i
   return flag

# 获得vec & word2id
vec = [] # the list of word vector
word2id = {} # word and its index
f = open('./origin_data/vec.txt', encoding='utf-8')
content = f.readline()
content = content.strip().split()
dim = int(content[1]) # the dimension of word vector
while True:
   content = f.readline()
   if content == '':
       break
   content = content.strip().split()
   word2id[content[0]] = len(word2id)  # the index of the current word(index from 0) 
   content = content[1:]
   content = [(float)(i) for i in content] # the list of word vector
   vec.append(content) 
f.close()

#word2id & vec 的进一步处理
word2id['UNK'] = len(word2id) # add unknown
word2id['BLANK'] = len(word2id) # add blank
vec.append(np.random.normal(size=dim, loc=0, scale=0.05)) # add UNK对应的词向量
vec.append(np.random.normal(size=dim, loc=0, scale=0.05)) # ad BLANK对应的词向量

vec = np.array(vec, dtype=np.float32) # 将vec变为array

#获得relation2id
relation2id = {}
f = open('./origin_data/relation2id.txt', 'r', encoding='utf-8')
while True:
   content = f.readline()
   if content == '':
       break
   content = content.strip().split()
   relation2id[content[0]] = int(content[1])
f.close()

#获得trian_sen & train_ans
#情况1：对于一组实体对可能存在多个label，即多个样本（句子）可能是相同的实体对；
#情况2：多个样本，可能实体对一样，同时label也一样，即一组实体对一个label对应多个sentence（样本）

fixlen = 70 # 每个句子的长度固定为70，即有70个字
maxlen = 60 # 

train_sen = {} # 类似于情况2，字典中每个实体对的value为不同label值对应的sentence，sentence用output表示
train_ans = {}  # {entity pair:[[label1],[label2],...]]} the label is one-hot vector，类似于情况1，每个实体对对应的label

print('reading train data...')
f = open('./origin_data/train.txt', 'r', encoding='utf-8')

while True:
   content = f.readline() # 一句一句地处理
   if content == '':
       break

   content = content.strip().split()
   # get entity name
   en1 = content[0] # get entity 1
   en2 = content[1] # get entity 2
   relation = 0 # 关系初始化为0
   if content[2] not in relation2id: # 本质是报错信息
       relation = relation2id['NA']
   else:
       relation = relation2id[content[2]] # 更新每个句子的relation(index表示)
   # put the same entity pair sentences into a dict
   tup = (en1, en2) # 一个句子实体对构成的元组
   label_tag = 0 # 该句子在tran_ans中value对应的索引（value是一个list，每个元组为label，label为一个one-hot）
   if tup not in train_sen: # 实体对不在tran_sen时
       train_sen[tup] = []
       train_sen[tup].append([])
       y_id = relation
       label_tag = 0
       label = [0 for i in range(len(relation2id))]
       label[y_id] = 1
       train_ans[tup] = []
       train_ans[tup].append(label)
   else: # 实体对在tran_sen时
       y_id = relation
       label_tag = 0
       label = [0 for i in range(len(relation2id))]
       label[y_id] = 1

       temp = find_index(label, train_ans[tup]) # the index of label in train_ans[tup]
       if temp == -1: # 实体对在tran_sen且实体对对应的label值不在tran_ans时
           train_ans[tup].append(label)
           label_tag = len(train_ans[tup]) - 1 # when not found, return the last index in train_ans[tup]
           train_sen[tup].append([])
       else: # 实体对在tran_sen且实体对对应的label值在tran_ans时
           label_tag = temp  # when found, return temp

   sentence = content[3] # 句子

   en1pos = 0
   en2pos = 0

   #For Chinese
   en1pos = sentence.find(en1) # 句子中实体1的位置（index）
   if en1pos == -1:
       en1pos = 0
   en2pos = sentence.find(en2)
   if en2pos == -1:
       en2post = 0

   output = []

   # Embeding the position
   for i in range(fixlen):
       word = word2id['BLANK']
       rel_e1 = pos_embed(i - en1pos) # 单词距离实体的位置：单词在实体左边时编码为0-60(太远即大于60时)编码为0，重合编码为61
       rel_e2 = pos_embed(i - en2pos) # 单词距离实体的位置：单词在实体右边时编码为62-122(太远即大于60时)编码为122
       output.append([word, rel_e1, rel_e2])  # initializer，列表套列表，每个元素为[word, rel_e1, rel_e2]

   for i in range(min(fixlen, len(sentence))):
       word = 0
       if sentence[i] not in word2id:
           word = word2id['UNK']
       else:
           word = word2id[sentence[i]]  # the index of the word

       output[i][0] = word  # freshing，word表示一个句子中每个词在word2id中对应的索引

   train_sen[tup][label_tag].append(output)

# get test_sen & test_ans
test_sen = {}  # 与train不同的是，少了一个维度，大列表中每个元素为所有sentence
test_ans = {}  # 与train不同的是，label为N_hot，因此每个实体对对应的value只是一个N-hot的列表
f = open('./origin_data/test.txt', 'r', encoding='utf-8')

while True:
   content = f.readline()
   if content == '':
       break

   content = content.strip().split()
   en1 = content[0]
   en2 = content[1]
   relation = 0
   if content[2] not in relation2id:
       relation = relation2id['NA']
   else:
       relation = relation2id[content[2]]
   tup = (en1, en2)

   if tup not in test_sen:
       test_sen[tup] = []
       y_id = relation
       label_tag = 0
       label = [0 for i in range(len(relation2id))]
       label[y_id] = 1
       test_ans[tup] = label
   else:
       y_id = relation
       test_ans[tup][y_id] = 1

   sentence = content[3]

   en1pos = 0
   en2pos = 0

   #For Chinese
   en1pos = sentence.find(en1)
   if en1pos == -1:
       en1pos = 0
   en2pos = sentence.find(en2)
   if en2pos == -1:
       en2post = 0

   output = []

   for i in range(fixlen):
       word = word2id['BLANK']
       rel_e1 = pos_embed(i - en1pos)
       rel_e2 = pos_embed(i - en2pos)
       output.append([word, rel_e1, rel_e2]) 

   for i in range(min(fixlen, len(sentence))):
       word = 0
       if sentence[i] not in word2id:
           word = word2id['UNK']
       else:
           word = word2id[sentence[i]] 

       output[i][0] = word  
   test_sen[tup].append(output)

train_x = []
train_y = []
test_x = []
test_y = []

f = open('./data/train_q&a.txt', 'w', encoding='utf-8')
temp = 0 # 行数
for i in train_sen: # i为每个实体对
   if len(train_ans[i]) != len(train_sen[i]):
       print('ERROR')
   lenth = len(train_ans[i]) # 计算实体对有几个label
   for j in range(lenth): # j为每个label的标签
       train_x.append(train_sen[i][j]) # 添加第i个实体对的第j个label对应的sentence（三维）
       train_y.append(train_ans[i][j]) # 添加第i个实体对的第j个label（二维）
       f.write(str(temp) + '\t' + i[0] + '\t' + i[1] + '\t' + str(np.argmax(train_ans[i][j])) + '\n') 
       temp += 1
f.close()

f = open('./data/test_q&a.txt', 'w', encoding='utf-8')
temp = 0
for i in test_sen:
   test_x.append(test_sen[i])
   test_y.append(test_ans[i])
   tempstr = ''
   for j in range(len(test_ans[i])):
       if test_ans[i][j] != 0:
           tempstr = tempstr + str(j) + '\t'
   f.write(str(temp) + '\t' + i[0] + '\t' + i[1] + '\t' + tempstr + '\n')
   temp += 1
f.close()
print(train_x[0:2])

train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)

np.save('./data/vec.npy', vec)
np.save('./data/train_x.npy', train_x)
np.save('./data/train_y.npy', train_y)
np.save('./data/testall_x.npy', test_x)
np.save('./data/testall_y.npy', test_y)

x_train = np.load('./data/train_x.npy',allow_pickle=True)

train_word = []
train_pos1 = []
train_pos2 = []                                    

print('seprating train data')
for i in range(len(x_train)): # each tuple
   word = []
   pos1 = []
   pos2 = []
   for j in x_train[i]: # each sentence
       temp_word = []
       temp_pos1 = []
       temp_pos2 = []
       for k in j: # each element in sentence(output)
           temp_word.append(k[0])
           temp_pos1.append(k[1])
           temp_pos2.append(k[2])
       word.append(temp_word)
       pos1.append(temp_pos1)
       pos2.append(temp_pos2)
   train_word.append(word) # the index of all words 
   train_pos1.append(pos1) # the pos1 of all words
   train_pos2.append(pos2) # the pos2 of all words

train_word = np.array(train_word) # 所有词的index
train_pos1 = np.array(train_pos1) # 所有re_pos1
train_pos2 = np.array(train_pos2) # 所有re_pos2
print(train_word.shape)
print(train_pos1.shape)
print(train_pos2.shape)
print(train_word[0:2])
np.save('./data/train_word.npy', train_word)
np.save('./data/train_pos1.npy', train_pos1)
np.save('./data/train_pos2.npy', train_pos2)

print('seperating test all data')
x_test = np.load('./data/testall_x.npy')
test_word = []
test_pos1 = []
test_pos2 = []

for i in range(len(x_test)):
   word = []
   pos1 = []
   pos2 = []
   for j in x_test[i]:
       temp_word = []
       temp_pos1 = []
       temp_pos2 = []
       for k in j:
           temp_word.append(k[0])
           temp_pos1.append(k[1])
           temp_pos2.append(k[2])
       word.append(temp_word)
       pos1.append(temp_pos1)
       pos2.append(temp_pos2)
   test_word.append(word)
   test_pos1.append(pos1)
   test_pos2.append(pos2)

test_word = np.array(test_word)
test_pos1 = np.array(test_pos1)
test_pos2 = np.array(test_pos2)
print(test_word[0:2])
print(test_word.shape)

np.save('./data/testall_word.npy', test_word)
np.save('./data/testall_pos1.npy', test_pos1)
np.save('./data/testall_pos2.npy', test_pos2)

test_y = np.load('./data/testall_y.npy')
eval_y = []
for i in test_y:
   eval_y.append(i[1:]) # unless unknown [label1, label2,...]
allans = np.reshape(eval_y, (-1)) # 1 dimension unless unknown
print(allans.shape)
print(allans[0:10])
np.save('./data/allans.npy', allans)

fwrite = open('./data/metadata.tsv', 'w', encoding='utf-8')
f = open('./origin_data/vec.txt', encoding='utf-8')
f.readline()
while True:
   content = f.readline().strip()
   if content == '':
       break
   name = content.split()[0] # the list of all words
   fwrite.write(name + '\n') 
f.close()
fwrite.close()

2. 定义网络结构

本论文网络结构主要分为两大部分，一为双向的LSTM，因此首先要将刚才得到的数据分成多个batch，并转化成LSTM要求的shape，LSTM结果得到以后，还需要经过第二大部分即attention，attention简单来讲就是赋予不同的权重，从而使得学习更精准，网络结构的定义较为简单，多为调用tf库和书写公式。

3. 数据分割及训练网络

第一章虽然对数据进行了预处理，但是仍然不是LSTM想要的输入。此外在神经网络模型中，往往将数据分为很多batch，此代码认为batch_size=50，故在用网路进行训练时，仍然需要对数据进行转换并分割数据。训练网络时，采用adam的方式进行优化，并用准确率进行评估

4. 代码展示

剩余主要部分代码如下所示：

import tensorflow as tf
import numpy as np
import time
import datetime
from tensorflow.contrib.tensorboard.plugins import projector

save_path = './model/'
#print('reading wordembedding') 
wordembedding = np.load('./data/vec.npy') # 即vec
#print(wordembedding.shape)
len(wordembedding)

print('reading training data')
train_y = np.load('./data/train_y.npy',allow_pickle=True)
train_word = np.load('./data/train_word.npy',allow_pickle=True)
train_pos1 = np.load('./data/train_pos1.npy',allow_pickle=True)
train_pos2 = np.load('./data/train_pos2.npy',allow_pickle=True)

vocab_size = 16691 # 单词的总个数
num_steps = 70  # 一个句子中单词的个数 
num_epochs = 10 # epoch的个数，循环几轮
num_classes = 12 # 种类数
lstm_size = 230 # 隐藏层神经元的个数(230个cell)
keep_prob = 0.5 # dropout比例
num_layers = 1  # 隐藏层个数
pos_size = 5 # 
pos_num = 123 # 
big_num = 50 # batch_size

# 定义神经网络结构
class lstm:
    def __init__(self, is_training, word_embeddings): # is_training=True, word_embeddings=wordembedding

        self.num_steps = num_steps
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.lstm_size = lstm_size
        self.big_num = big_num 

        self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_word')# 占位符
        self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos2')
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='input_y')
        self.total_shape = tf.placeholder(dtype=tf.int32, shape=[big_num + 1], name='total_shape') # 一维array
        total_num = self.total_shape[-1] # 取出最后一个，即当前样本的total_num

        word_embedding = tf.get_variable(initializer=word_embeddings, name='word_embedding') # vec,shape(16691,100)

        pos1_embedding = tf.get_variable('pos1_embedding', [pos_num, pos_size])# shape(123,5)
        pos2_embedding = tf.get_variable('pos2_embedding', [pos_num, pos_size])# shape(123,5)

        attention_w = tf.get_variable('attention_omega', [lstm_size, 1]) # 权重：shape(230,5)
        sen_a = tf.get_variable('attention_A', [lstm_size]) # shape(230)
        sen_r = tf.get_variable('query_r', [lstm_size, 1]) # shape(230,1)
        relation_embedding = tf.get_variable('relation_embedding', [self.num_classes, lstm_size])# shape(12, 230)
        sen_d = tf.get_variable('bias_d', [self.num_classes])# shape(12,)

        lstm_cell_forward = tf.contrib.rnn.lstmCell(lstm_size) # 正序的lstm
        lstm_cell_backward = tf.contrib.rnn.lstmCell(lstm_size) # 反序的lstm

        if is_training and keep_prob < 1: 
            lstm_cell_forward = tf.contrib.rnn.DropoutWrapper(lstm_cell_forward, output_keep_prob=keep_prob) # 设置dropout
            lstm_cell_backward = tf.contrib.rnn.DropoutWrapper(lstm_cell_backward, output_keep_prob=keep_prob)

        cell_forward = tf.contrib.rnn.MultiRNNCell([lstm_cell_forward] * num_layers) # 正向lstm
        cell_backward = tf.contrib.rnn.MultiRNNCell([lstm_cell_backward] * num_layers) # 反向lstm

        sen_repre = []
        sen_alpha = []
        sen_s = []
        sen_out = []
        self.prob = []
        self.predictions = []
        self.loss = []
        self.accuracy = []
        self.total_loss = 0.0 

        self._initial_state_forward = cell_forward.zero_state(total_num, tf.float32) # 初始化
        self._initial_state_backward = cell_backward.zero_state(total_num, tf.float32) # 初始化

        # embedding layer
        inputs_forward = tf.concat(axis=2, values=[tf.nn.embedding_lookup(word_embedding, self.input_word),
                                                   tf.nn.embedding_lookup(pos1_embedding, self.input_pos1),
                                                   tf.nn.embedding_lookup(pos2_embedding, self.input_pos2)])
        inputs_backward = tf.concat(axis=2,
                                    values=[tf.nn.embedding_lookup(word_embedding, tf.reverse(self.input_word, [1])),
                                            tf.nn.embedding_lookup(pos1_embedding, tf.reverse(self.input_pos1, [1])),
                                            tf.nn.embedding_lookup(pos2_embedding,
                                                                   tf.reverse(self.input_pos2, [1]))])

        outputs_forward = []

        state_forward = self._initial_state_forward

        # Bi-lstm layer
        with tf.variable_scope('lstm_FORWARD') as scope:
            for step in range(num_steps):
                if step > 0:
                    scope.reuse_variables()
                (cell_output_forward, state_forward) = cell_forward(inputs_forward[:, step, :], state_forward)
                outputs_forward.append(cell_output_forward)

        outputs_backward = []

        state_backward = self._initial_state_backward
        with tf.variable_scope('lstm_BACKWARD') as scope:
            for step in range(num_steps):
                if step > 0:
                    scope.reuse_variables()
                (cell_output_backward, state_backward) = cell_backward(inputs_backward[:, step, :], state_backward)
                outputs_backward.append(cell_output_backward)
        # 50个实体对的sentence个数， 70， 230
        output_forward = tf.reshape(tf.concat(axis=1, values=outputs_forward), [total_num, num_steps, lstm_size])
        
        output_backward = tf.reverse(
            tf.reshape(tf.concat(axis=1, values=outputs_backward), [total_num, num_steps, lstm_size]),
            [1])

        # word-level attention layer
        output_h = tf.add(output_forward, output_backward) # 将正向和反向的结果相加shape(50个实体对的sentence个数， 70， 230)
        attention_r = tf.reshape(tf.matmul(tf.reshape(tf.nn.softmax(
            tf.reshape(tf.matmul(tf.reshape(tf.tanh(output_h), [total_num * num_steps, lstm_size]), attention_w),
                       [total_num, num_steps])), [total_num, 1, num_steps]), output_h), [total_num, lstm_size])
        # attention_r:shape(50个实体对的sentence个数,230)



            # tf.summary.scalar('loss',self.total_loss)
            # tf.scalar_summary(['loss'],[self.total_loss])
            with tf.name_scope("accuracy"):
                self.accuracy.append(
                    tf.reduce_mean(tf.cast(tf.equal(self.predictions[i], tf.argmax(self.input_y[i], 0)), "float"),
                                   name="accuracy"))

        # tf.summary.scalar('loss',self.total_loss)
        tf.summary.scalar('loss', self.total_loss)
        # regularization
        self.l2_loss = tf.contrib.layers.apply_regularization(regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                                                              weights_list=tf.trainable_variables())
        self.final_loss = self.total_loss + self.l2_loss
        tf.summary.scalar('l2_loss', self.l2_loss)
        tf.summary.scalar('final_loss', self.final_loss)

with tf.Graph().as_default():

    sess = tf.Session()
    with sess.as_default():

        initializer = tf.contrib.layers.xavier_initializer()
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m =lstm(is_training=True, word_embeddings=wordembedding) # 定义网络结构
        # 损失函数在上一个cell中定义，并采用类的方式调用：m.final_loss
        global_step = tf.Variable(0, name="global_step", trainable=False) # optimizer.minimize的参数
        optimizer = tf.train.AdamOptimizer(0.0005) # 优化函数

        train_op = optimizer.minimize(m.final_loss, global_step=global_step) # 优化目标,参数为损失函数，
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=None) # 保存模型

        merged_summary = tf.summary.merge_all() # 画图用
        summary_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train_loss', sess.graph) # 画图用

        for one_epoch in range(num_epochs):
            temp_order = list(range(len(train_word))) # 0-967的列表,967表示训练集有967个实体对
            np.random.shuffle(temp_order) # 将temp_order 乱序排列
            for i in range(int(len(temp_order) / float(big_num))):  # 967 // 50 即iteration
                ######################开始构造随机的big_num(50)个临时训练数据##############################################################
                temp_word = []
                temp_pos1 = []
                temp_pos2 = []
                temp_y = []

                temp_input = temp_order[i * big_num:(i + 1) * big_num] # 本质是batch_size，每次取50个实体对，这里取出了这50个的index
                # 随机构造50个训练样本
                for k in temp_input: # 对于这50个实体对的每一个
                    temp_word.append(train_word[k]) # 循环结束后为trian_word的50个（随机）
                    temp_pos1.append(train_pos1[k]) # 循环结束后为trian_pos1的50个（随机）
                    temp_pos2.append(train_pos2[k]) # 循环结束后为trian_pos2的50个（随机）
                    temp_y.append(train_y[k])      # 循环结束后为train_y的50个（随机）
                num = 0
                for single_word in temp_word: # 这50个随机实体对共有多少个label-sentence
                    num += len(single_word) # len(single_word)表示一个实体对有几个label-sentence

                if num > 1500: 
                    print('out of range')
                    continue

                temp_word = np.array(temp_word) # 转换成array
                temp_pos1 = np.array(temp_pos1)
                temp_pos2 = np.array(temp_pos2)
                temp_y = np.array(temp_y)
                ###################################随机构造50个临时训练数据结束###########################################################
                
                ######################开始构造随机的big_num(50)个训练数据#############################################################
                feed_dict = {}
                total_shape = []# 第0个为0，此后第k个数为前k-1个实体对的label-sentence和，一共有51个数（多加了没有样本时的0）
                total_num = 0 # 记录total_shape前k个样本label的和
                total_word = []# 训练集数据：行数为50个实体对对应的label-sentence的总个数，列就是每一句对应单词的index
                total_pos1 = []# 训练集数据：行数为50个实体对对应的label-sentence的总个数，列就是每一句每个单词的rel_pos1
                total_pos2 = []# 训练集数据：行数为50个实体对对应的label-sentence的总个数，列就是每一句每个单词的rel_pos2
                for i in range(len(temp_word)): # 从0到50循环，为了构造上述空列表和字典，一个样本一个添加
                    total_shape.append(total_num) 
                    total_num += len(temp_word[i])# 第i个样本的label-sentence 个数
                    for word in temp_word[i]: 
                        total_word.append(word) 
                    for pos1 in temp_pos1[i]: 
                        total_pos1.append(pos1)
                    for pos2 in temp_pos2[i]:
                        total_pos2.append(pos2)
                total_shape.append(total_num)
                total_shape = np.array(total_shape) # 
                total_word = np.array(total_word)
                total_pos1 = np.array(total_pos1)
                total_pos2 = np.array(total_pos2)

                feed_dict[m.total_shape] = total_shape
                feed_dict[m.input_word] = total_word
                feed_dict[m.inpu1] = total_pos1
                feed_dict[m.input_pt_posos2] = total_pos2
                feed_dict[m.input_y] = temp_y
                ###################################随机构造50个训练数据结束###########################################################
                
                
                ###################################开始运行神经网络###########################################################
                temp, step, loss, accuracy, summary, l2_loss, final_loss = sess.run(
                    [train_op, global_step, m.total_loss, m.accuracy, merged_summary, m.l2_loss, m.final_loss],
                    feed_dict) 
                time_str = datetime.datetime.now().isoformat() # 计时
                accuracy = np.reshape(np.array(accuracy), (big_num)) # 准确率，在network中定义
                acc = np.mean(accuracy) # 均值
                summary_writer.add_summary(summary, step) # 画图

                if step % 50 == 0:
                    tempstr = "{}: step {}, softmax_loss {:g}, acc {:g}".format(time_str, step, loss, acc)
                    print(tempstr)               
                
                

                current_step = tf.train.global_step(sess, global_step)
                if current_step > 8000 and current_step % 100 == 0:
                    print('saving model')
                    path = saver.save(sess, save_path + 'ATT_lstm_model', global_step=current_step)
                    tempstr = 'have saved model to ' + path
                    print(tempstr)