转载:tf 利用双向LSTM实现分词

数据预处理:

# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from tqdm import tqdm
 
 
# 以字符串的形式读入所有数据
with open('raw_data/msr_train.txt', 'rb') as inp:
    texts = inp.read().decode('gbk')
sentences = texts.split('\r\n')  # 根据换行切分
 
 
# 将不规范的内容(如每行的开头)去掉
def clean(s):
    if u'“/s' not in s:  # 句子中间的引号不应去掉
        return s.replace(u' ”/s', '')
    elif u'”/s' not in s:
        return s.replace(u'“/s ', '')
    elif u'‘/s' not in s:
        return s.replace(u' ’/s', '')
    elif u'’/s' not in s:
        return s.replace(u'‘/s ', '')
    else:
        return s
 
texts = u''.join(map(clean, sentences))  # 把所有的词拼接起来
#print 'Length of texts is %d' % len(texts)
#print 'Example of texts: \n', texts[:300]
 
# 重新以标点来划分
sentences = re.split(u'[,。!?、‘’“”]/[bems]', texts)
#print 'Sentences number:', len(sentences)
#print 'Sentence Example:\n', sentences[1]
 
def get_Xy(sentence):
    """将 sentence 处理成 [word1, w2, ..wn], [tag1, t2, ...tn]"""
    words_tags = re.findall('(.)/(.)', sentence)
    if words_tags:
        words_tags = np.asarray(words_tags)
        words = words_tags[:, 0]
        tags = words_tags[:, 1]
        return words, tags # 所有的字和tag分别存为 data / label
    return None
 
datas = list()
labels = list()
print 'Start creating words and tags data ...'
for sentence in tqdm(iter(sentences)):
    result = get_Xy(sentence)
    if result:
        datas.append(result[0])
        labels.append(result[1])
 
print 'Length of datas is %d' % len(datas)
print 'Example of datas: ', datas[0]
print 'Example of labels:', labels[0]
 
df_data = pd.DataFrame({'words': datas, 'tags': labels}, index=range(len(datas)))
# 句子长度
df_data['sentence_len'] = df_data['words'].apply(lambda words: len(words))
 
# 句子长度的分布
import matplotlib.pyplot as plt
#df_data['sentence_len'].hist(bins=100)
#plt.xlim(0, 100)
#plt.xlabel('sentence_length')
#plt.ylabel('sentence_num')
#plt.title('Distribution of the Length of Sentence')
#plt.show()
 
# 1.用 chain(*lists) 函数把多个list拼接起来
from itertools import chain
all_words = list(chain(*df_data['words'].values))
# 2.统计所有 word
sr_allwords = pd.Series(all_words)
sr_allwords = sr_allwords.value_counts()
set_words = sr_allwords.index
set_ids = range(1, len(set_words)+1) # 注意从1开始,因为我们准备把0作为填充值
tags = [ 'x', 's', 'b', 'm', 'e']
tag_ids = range(len(tags))
# 3. 构建 words 和 tags 都转为数值 id 的映射(使用 Series 比 dict 更加方便)
word2id = pd.Series(set_ids, index=set_words)
id2word = pd.Series(set_words, index=set_ids)
tag2id = pd.Series(tag_ids, index=tags)
id2tag = pd.Series(tags, index=tag_ids)
vocab_size = len(set_words)
print 'vocab_size={}'.format(vocab_size)
 
max_len = 32
def X_padding(words):
    """把 words 转为 id 形式,并自动补全位 max_len 长度。"""
    ids = list(word2id[words])
    if len(ids) >= max_len:  # 长则弃掉
        return ids[:max_len]
    ids.extend([0]*(max_len-len(ids))) # 短则补全
    return ids
 
def y_padding(tags):
    """把 tags 转为 id 形式, 并自动补全位 max_len 长度。"""
    ids = list(tag2id[tags])
    if len(ids) >= max_len:  # 长则弃掉
        return ids[:max_len]
    ids.extend([0]*(max_len-len(ids))) # 短则补全
    return ids
 
df_data['X'] = df_data['words'].apply(X_padding)
df_data['y'] = df_data['tags'].apply(y_padding)
 
# 最后得到了所有的数据
X = np.asarray(list(df_data['X'].values))
y = np.asarray(list(df_data['y'].values))
print 'X.shape={}, y.shape={}'.format(X.shape, y.shape)
print 'Example of words: ', df_data['words'].values[0]
print 'Example of X: ', X[0]
print 'Example of tags: ', df_data['tags'].values[0]
print 'Example of y: ', y[0]
 
# 保存数据
import pickle
import os
 
if not os.path.exists('data/'):
    os.makedirs('data/')
 
with open('data/data.pkl', 'wb') as outp:
    pickle.dump(X, outp)
    pickle.dump(y, outp)
    pickle.dump(word2id, outp)
    pickle.dump(id2word, outp)
    pickle.dump(tag2id, outp)
    pickle.dump(id2tag, outp)
print '** Finished saving the data.'
训练和测试:

# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from tqdm import tqdm
import time
from sklearn.model_selection import train_test_split
import tensorflow as tf
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
sess = tf.Session()
from tensorflow.contrib import rnn
 
# 以字符串的形式读入所有数据
with open('raw_data/msr_train.txt', 'rb') as inp:
    texts = inp.read().decode('gbk')
sentences = texts.split('\r\n')  # 根据换行切分
 
 
# 将不规范的内容(如每行的开头)去掉
def clean(s):
    if u'“/s' not in s:  # 句子中间的引号不应去掉
        return s.replace(u' ”/s', '')
    elif u'”/s' not in s:
        return s.replace(u'“/s ', '')
    elif u'‘/s' not in s:
        return s.replace(u' ’/s', '')
    elif u'’/s' not in s:
        return s.replace(u'‘/s ', '')
    else:
        return s
 
texts = u''.join(map(clean, sentences))  # 把所有的词拼接起来
#print 'Length of texts is %d' % len(texts)
#print 'Example of texts: \n', texts[:300]
 
# 重新以标点来划分
sentences = re.split(u'[,。!?、‘’“”]/[bems]', texts)
#print 'Sentences number:', len(sentences)
#print 'Sentence Example:\n', sentences[1]
 
def get_Xy(sentence):
    """将 sentence 处理成 [word1, w2, ..wn], [tag1, t2, ...tn]"""
    words_tags = re.findall('(.)/(.)', sentence)
    if words_tags:
        words_tags = np.asarray(words_tags)
        words = words_tags[:, 0]
        tags = words_tags[:, 1]
        return words, tags # 所有的字和tag分别存为 data / label
    return None
 
datas = list()
labels = list()
print 'Start creating words and tags data ...'
for sentence in tqdm(iter(sentences)):
    result = get_Xy(sentence)
    if result:
        datas.append(result[0])
        labels.append(result[1])
 
def trainLstm(train_op,data_train,data_valid,data_test):
    def test_epoch(dataset):
        """Testing or valid."""
        _batch_size = 500
        fetches = [accuracy, cost]
        _y = dataset.y
        data_size = _y.shape[0]
        batch_num = int(data_size / _batch_size)
        start_time = time.time()
        _costs = 0.0
        _accs = 0.0
        for i in xrange(batch_num):
            X_batch, y_batch = dataset.next_batch(_batch_size)
            feed_dict = {X_inputs: X_batch, y_inputs: y_batch, lr: 1e-5, batch_size: _batch_size, keep_prob: 1.0}
            _acc, _cost = sess.run(fetches, feed_dict)
            _accs += _acc
            _costs += _cost
        mean_acc = _accs / batch_num
        mean_cost = _costs / batch_num
        return mean_acc, mean_cost
 
    sess.run(tf.global_variables_initializer())
    tr_batch_size = 128
    max_max_epoch = 6
    display_num = 5  # 每个 epoch 显示是个结果
    tr_batch_num = int(data_train.y.shape[0] / tr_batch_size)  # 每个 epoch 中包含的 batch 数
    display_batch = int(tr_batch_num / display_num)  # 每训练 display_batch 之后输出一次
    saver = tf.train.Saver(max_to_keep=10)  # 最多保存的模型数量
    for epoch in xrange(max_max_epoch):
        _lr = 1e-4
        if epoch > max_epoch:
            _lr = _lr * ((decay) ** (epoch - max_epoch))
        print 'EPOCH %d, lr=%g' % (epoch + 1, _lr)
        start_time = time.time()
        _costs = 0.0
        _accs = 0.0
        show_accs = 0.0
        show_costs = 0.0
        for batch in xrange(tr_batch_num):
            fetches = [accuracy, cost, train_op]
            X_batch, y_batch = data_train.next_batch(tr_batch_size)
            feed_dict = {X_inputs: X_batch, y_inputs: y_batch, lr: _lr, batch_size: tr_batch_size, keep_prob: 0.5}
            _acc, _cost, _ = sess.run(fetches, feed_dict)  # the cost is the mean cost of one batch
            _accs += _acc
            _costs += _cost
            show_accs += _acc
            show_costs += _cost
            if (batch + 1) % display_batch == 0:
                valid_acc, valid_cost = test_epoch(data_valid)  # valid
                print '\ttraining acc=%g, cost=%g;  valid acc= %g, cost=%g ' % (show_accs / display_batch,
                                                                                show_costs / display_batch, valid_acc,
                                                                                valid_cost)
                show_accs = 0.0
                show_costs = 0.0
        mean_acc = _accs / tr_batch_num
        mean_cost = _costs / tr_batch_num
        if (epoch + 1) % 3 == 0:  # 每 3 个 epoch 保存一次模型
            save_path = saver.save(sess, model_save_path, global_step=(epoch + 1))
            print 'the save path is ', save_path
        print '\ttraining %d, acc=%g, cost=%g ' % (data_train.y.shape[0], mean_acc, mean_cost)
        print 'Epoch training %d, acc=%g, cost=%g, speed=%g s/epoch' % (
        data_train.y.shape[0], mean_acc, mean_cost, time.time() - start_time)
    # testing
    print '**TEST RESULT:'
    test_acc, test_cost = test_epoch(data_test)
    print '**Test %d, acc=%g, cost=%g' % (data_test.y.shape[0], test_acc, test_cost)
 
def testLstm(X_inputs,y_pred,sentence):
    # ** 导入模型
    saver = tf.train.Saver()
    best_model_path = 'ckpt/bi-lstm.ckpt-6'
    saver.restore(sess, best_model_path)
    # 利用 labels(即状态序列)来统计转移概率
    # 因为状态数比较少,这里用 dict={'I_tI_{t+1}':p} 来实现
    # A统计状态转移的频数
    A = {
        'sb': 0,
        'ss': 0,
        'be': 0,
        'bm': 0,
        'me': 0,
        'mm': 0,
        'eb': 0,
        'es': 0
    }
 
    # zy 表示转移概率矩阵
    zy = dict()
    for label in labels:
        for t in xrange(len(label) - 1):
            key = label[t] + label[t + 1]
            A[key] += 1.0
 
    zy['sb'] = A['sb'] / (A['sb'] + A['ss'])
    zy['ss'] = 1.0 - zy['sb']
    zy['be'] = A['be'] / (A['be'] + A['bm'])
    zy['bm'] = 1.0 - zy['be']
    zy['me'] = A['me'] / (A['me'] + A['mm'])
    zy['mm'] = 1.0 - zy['me']
    zy['eb'] = A['eb'] / (A['eb'] + A['es'])
    zy['es'] = 1.0 - zy['eb']
    keys = sorted(zy.keys())
    print 'the transition probability: '
    for key in keys:
        print key, zy[key]
 
    zy = {i: np.log(zy[i]) for i in zy.keys()}
 
    def viterbi(nodes):
        """
        维特比译码:除了第一层以外,每一层有4个节点。
        计算当前层(第一层不需要计算)四个节点的最短路径:
           对于本层的每一个节点,计算出路径来自上一层的各个节点的新的路径长度(概率)。保留最大值(最短路径)。
           上一层每个节点的路径保存在 paths 中。计算本层的时候,先用paths_ 暂存,然后把本层的最大路径保存到 paths 中。
           paths 采用字典的形式保存(路径:路径长度)。
           一直计算到最后一层,得到四条路径,将长度最短(概率值最大的路径返回)
        """
        paths = {'b': nodes[0]['b'], 's': nodes[0]['s']}  # 第一层,只有两个节点
        for layer in xrange(1, len(nodes)):  # 后面的每一层
            paths_ = paths.copy()  # 先保存上一层的路径
            # node_now 为本层节点, node_last 为上层节点
            paths = {}  # 清空 path
            for node_now in nodes[layer].keys():
                # 对于本层的每个节点,找出最短路径
                sub_paths = {}
                # 上一层的每个节点到本层节点的连接
                for path_last in paths_.keys():
                    if path_last[-1] + node_now in zy.keys():  # 若转移概率不为 0
                        sub_paths[path_last + node_now] = paths_[path_last] + nodes[layer][node_now] + zy[
                            path_last[-1] + node_now]
                # 最短路径,即概率最大的那个
                sr_subpaths = pd.Series(sub_paths)
                sr_subpaths = sr_subpaths.sort_values()  # 升序排序
                node_subpath = sr_subpaths.index[-1]  # 最短路径
                node_value = sr_subpaths[-1]  # 最短路径对应的值
                # 把 node_now 的最短路径添加到 paths 中
                paths[node_subpath] = node_value
        # 所有层求完后,找出最后一层中各个节点的路径最短的路径
        sr_paths = pd.Series(paths)
        sr_paths = sr_paths.sort_values()  # 按照升序排序
        return sr_paths.index[-1]  # 返回最短路径(概率值最大的路径)
 
    def text2ids(text):
        """把字片段text转为 ids."""
        words = list(text)
        ids = list(word2id[words])
        if len(ids) >= max_len:  # 长则弃掉
            print u'输出片段超过%d部分无法处理' % (max_len)
            return ids[:max_len]
        ids.extend([0] * (max_len - len(ids)))  # 短则补全
        ids = np.asarray(ids).reshape([-1, max_len])
        return ids
 
    def simple_cut(text):
        """对一个片段text(标点符号把句子划分为多个片段)进行预测。"""
        if text:
            text_len = len(text)
            X_batch = text2ids(text)  # 这里每个 batch 是一个样本
            fetches = [y_pred]
            feed_dict = {X_inputs: X_batch, lr: 1.0, batch_size: 1, keep_prob: 1.0}
            _y_pred = sess.run(fetches, feed_dict)[0][:text_len]  # padding填充的部分直接丢弃
            nodes = [dict(zip(['s', 'b', 'm', 'e'], each[1:])) for each in _y_pred]
            tags = viterbi(nodes)
            words = []
            for i in range(len(text)):
                if tags[i] in ['s', 'b']:
                    words.append(text[i])
                else:
                    words[-1] += text[i]
            return words
        else:
            return []
 
 
    """首先将一个sentence根据标点和英文符号/字符串划分成多个片段text,然后对每一个片段分词。"""
    not_cuts = re.compile(u'([0-9\da-zA-Z ]+)|[。,、?!.\.\?,!]')
    result = []
    start = 0
    for seg_sign in not_cuts.finditer(sentence):
        result.extend(simple_cut(sentence[start:seg_sign.start()]))
        result.append(sentence[seg_sign.start():seg_sign.end()])
        start = seg_sign.end()
    result.extend(simple_cut(sentence[start:]))
    return result
 
if __name__=="__main__":
    # 导入数据
    import pickle
 
    with open('data/data.pkl', 'rb') as inp:
        X = pickle.load(inp)
        y = pickle.load(inp)
        word2id = pickle.load(inp)
        id2word = pickle.load(inp)
        tag2id = pickle.load(inp)
        id2tag = pickle.load(inp)
 
    # 划分测试集/训练集/验证集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    print 'X_train.shape={}, y_train.shape={}; \nX_valid.shape={}, y_valid.shape={};\nX_test.shape={}, y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_valid.shape, y_valid.shape, X_test.shape, y_test.shape)
 
 
    # ** 3.build the data generator
    class BatchGenerator(object):
        """ Construct a Data generator. The input X, y should be ndarray or list like type.
        Example:
            Data_train = BatchGenerator(X=X_train_all, y=y_train_all, shuffle=False)
            Data_test = BatchGenerator(X=X_test_all, y=y_test_all, shuffle=False)
            X = Data_train.X
            y = Data_train.y
            or:
            X_batch, y_batch = Data_train.next_batch(batch_size)
         """
 
        def __init__(self, X, y, shuffle=False):
            if type(X) != np.ndarray:
                X = np.asarray(X)
            if type(y) != np.ndarray:
                y = np.asarray(y)
            self._X = X
            self._y = y
            self._epochs_completed = 0
            self._index_in_epoch = 0
            self._number_examples = self._X.shape[0]
            self._shuffle = shuffle
            if self._shuffle:
                new_index = np.random.permutation(self._number_examples)
                self._X = self._X[new_index]
                self._y = self._y[new_index]
 
        @property
        def X(self):
            return self._X
 
        @property
        def y(self):
            return self._y
 
        @property
        def num_examples(self):
            return self._number_examples
 
        @property
        def epochs_completed(self):
            return self._epochs_completed
 
        def next_batch(self, batch_size):
            """ Return the next 'batch_size' examples from this data set."""
            start = self._index_in_epoch
            self._index_in_epoch += batch_size
            if self._index_in_epoch > self._number_examples:
                # finished epoch
                self._epochs_completed += 1
                # Shuffle the data
                if self._shuffle:
                    new_index = np.random.permutation(self._number_examples)
                    self._X = self._X[new_index]
                    self._y = self._y[new_index]
                start = 0
                self._index_in_epoch = batch_size
                assert batch_size <= self._number_examples
            end = self._index_in_epoch
            return self._X[start:end], self._y[start:end]
 
 
    print 'Creating the data generator ...'
    data_train = BatchGenerator(X_train, y_train, shuffle=True)
    data_valid = BatchGenerator(X_valid, y_valid, shuffle=False)
    data_test = BatchGenerator(X_test, y_test, shuffle=False)
    print 'Finished creating the data generator.'
 
    '''
    BiLSTM Model
    For Chinese word segmentation.
    '''
    # ##################### config ######################
    decay = 0.85
    max_epoch = 5
    max_max_epoch = 10
    timestep_size = max_len = 32  # 句子长度
    vocab_size = 5159  # 样本中不同字的个数+1(padding 0),根据处理数据的时候得到
    input_size = embedding_size = 64  # 字向量长度
    class_num = 5
    hidden_size = 128  # 隐含层节点数
    layer_num = 2  # bi-lstm 层数
    max_grad_norm = 5.0  # 最大梯度(超过此值的梯度将被裁剪)
 
    lr = tf.placeholder(tf.float32, [])
    keep_prob = tf.placeholder(tf.float32, [])
    batch_size = tf.placeholder(tf.int32, [])  # 注意类型必须为 tf.int32
    model_save_path = 'ckpt/bi-lstm.ckpt'  # 模型保存位置
 
    with tf.variable_scope('embedding'):
        embedding = tf.get_variable("embedding", [vocab_size, embedding_size], dtype=tf.float32)
 
 
    def weight_variable(shape):
        """Create a weight variable with appropriate initialization."""
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
 
 
    def bias_variable(shape):
        """Create a bias variable with appropriate initialization."""
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)
 
 
    def lstm_cell():
        cell = rnn.LSTMCell(hidden_size, reuse=tf.get_variable_scope().reuse)
        return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
 
 
    def bi_lstm(X_inputs):
        """build the bi-LSTMs network. Return the y_pred"""
        # X_inputs.shape = [batchsize, timestep_size]  ->  inputs.shape = [batchsize, timestep_size, embedding_size]
        inputs = tf.nn.embedding_lookup(embedding, X_inputs)
 
        # ** 1.构建前向后向多层 LSTM
        cell_fw = rnn.MultiRNNCell([lstm_cell() for _ in range(layer_num)], state_is_tuple=True)
        cell_bw = rnn.MultiRNNCell([lstm_cell() for _ in range(layer_num)], state_is_tuple=True)
 
        # ** 2.初始状态
        initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)
        initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)
 
        # 下面两部分是等价的
        # **************************************************************
        # ** 把 inputs 处理成 rnn.static_bidirectional_rnn 的要求形式
        # ** 文档说明
        # inputs: A length T list of inputs, each a tensor of shape
        # [batch_size, input_size], or a nested tuple of such elements.
        # *************************************************************
        # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        # inputs.shape = [batchsize, timestep_size, embedding_size]  ->  timestep_size tensor, each_tensor.shape = [batchsize, embedding_size]
        # inputs = tf.unstack(inputs, timestep_size, 1)
        # ** 3.bi-lstm 计算(tf封装)  一般采用下面 static_bidirectional_rnn 函数调用。
        #   但是为了理解计算的细节,所以把后面的这段代码进行展开自己实现了一遍。
        #     try:
        #         outputs, _, _ = rnn.static_bidirectional_rnn(cell_fw, cell_bw, inputs,
        #                         initial_state_fw = initial_state_fw, initial_state_bw = initial_state_bw, dtype=tf.float32)
        #     except Exception: # Old TensorFlow version only returns outputs not states
        #         outputs = rnn.static_bidirectional_rnn(cell_fw, cell_bw, inputs,
        #                         initial_state_fw = initial_state_fw, initial_state_bw = initial_state_bw, dtype=tf.float32)
        #     output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size * 2])
        # ***********************************************************
 
        # ***********************************************************
        # ** 3. bi-lstm 计算(展开)
        with tf.variable_scope('bidirectional_rnn'):
            # *** 下面,两个网络是分别计算 output 和 state
            # Forward direction
            outputs_fw = list()
            state_fw = initial_state_fw
            with tf.variable_scope('fw'):
                for timestep in range(timestep_size):
                    if timestep > 0:
                        tf.get_variable_scope().reuse_variables()
                    (output_fw, state_fw) = cell_fw(inputs[:, timestep, :], state_fw)
                    outputs_fw.append(output_fw)
 
            # backward direction
            outputs_bw = list()
            state_bw = initial_state_bw
            with tf.variable_scope('bw') as bw_scope:
                inputs = tf.reverse(inputs, [1])
                for timestep in range(timestep_size):
                    if timestep > 0:
                        tf.get_variable_scope().reuse_variables()
                    (output_bw, state_bw) = cell_bw(inputs[:, timestep, :], state_bw)
                    outputs_bw.append(output_bw)
            # *** 然后把 output_bw 在 timestep 维度进行翻转
            # outputs_bw.shape = [timestep_size, batch_size, hidden_size]
            outputs_bw = tf.reverse(outputs_bw, [0])
            # 把两个oupputs 拼成 [timestep_size, batch_size, hidden_size*2]
            output = tf.concat([outputs_fw, outputs_bw], 2)
            output = tf.transpose(output, perm=[1, 0, 2])
            output = tf.reshape(output, [-1, hidden_size * 2])
        # ***********************************************************
        return output  # [-1, hidden_size*2]
 
 
    with tf.variable_scope('Inputs'):
        X_inputs = tf.placeholder(tf.int32, [None, timestep_size], name='X_input')
        y_inputs = tf.placeholder(tf.int32, [None, timestep_size], name='y_input')
 
    bilstm_output = bi_lstm(X_inputs)
 
    with tf.variable_scope('outputs'):
        softmax_w = weight_variable([hidden_size * 2, class_num])
        softmax_b = bias_variable([class_num])
        y_pred = tf.matmul(bilstm_output, softmax_w) + softmax_b
 
    # adding extra statistics to monitor
    # y_inputs.shape = [batch_size, timestep_size]
    correct_prediction = tf.equal(tf.cast(tf.argmax(y_pred, 1), tf.int32), tf.reshape(y_inputs, [-1]))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    cost = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_inputs, [-1]), logits=y_pred))
 
    # ***** 优化求解 *******
    tvars = tf.trainable_variables()  # 获取模型的所有参数
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)  # 获取损失函数对于每个参数的梯度
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)  # 优化器
 
    # 梯度下降计算
    train_op = optimizer.apply_gradients(zip(grads, tvars),
                                         global_step=tf.contrib.framework.get_or_create_global_step())
    print 'Finished creating the bi-lstm model.'
 
    '''
    if you want to train 
    '''
    #trainLstm(train_op, data_train, data_valid, data_test)
 
    '''
    test model
    '''
    # 例一
    sentence = u'人们思考问题往往不是从零开始的。就好像你现在阅读这篇文章一样,你对每个词的理解都会依赖于你前面看到的一些词,\
          而不是把你前面看的内容全部抛弃了,忘记了,再去理解这个单词。也就是说,人们的思维总是会有延续性的。'
    result = testLstm(X_inputs,y_pred,sentence)
    rss = ''
    for each in result:
        rss = rss + each + ' / '
    print rss
————————————————
版权声明:本文为CSDN博主「斯大分」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_27655147/article/details/79403894

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值