tensorflow Estimator train & evaluation & prediction & export model

最新推荐文章于 2023-02-07 20:51:45 发布
slmady
最新推荐文章于 2023-02-07 20:51:45 发布
阅读量498
点赞数
分类专栏： DL
本文链接：https://blog.csdn.net/u013509299/article/details/100990756
版权
DL 专栏收录该内容
2 篇文章 0 订阅
订阅专栏
本文介绍了一个使用Estimator实现transition-based dependency parsing的深度学习模型。该模型利用TensorFlow的Estimator API，通过词嵌入和位置嵌入进行特征提取，采用全连接层进行分类，旨在解决依赖关系解析问题。
摘要由CSDN通过智能技术生成
Estimator实现transition based dependency parsing
import numpy as np
import math
import tensorflow as tf
import cjdpy

WORD_NUM = 6

def model_fn(features, labels, mode):
    # word_lookup = tf.truncated_normal(shape=[14871, 100], mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
    # pos_lookup = tf.truncated_normal(shape=[79, 100], mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
    pos_lookup = tf.Variable(posEmbeddingMatrix, trainable=True)
    word_lookup = tf.Variable(wordEmbeddingMatrix, trainable=False)
    print("feature x.shape: ", features["x"].shape)
    out1 = tf.nn.embedding_lookup(word_lookup, features["x"][:, 0:WORD_NUM])
    out2 = tf.nn.embedding_lookup(pos_lookup, features["x"][:, WORD_NUM:2*WORD_NUM])
    print(out1)
    print(out2)

    # 全部特征concat
    out3 = tf.concat([tf.layers.flatten(out1), tf.layers.flatten(out2)], axis=1)
    out4 = tf.layers.dense(out3, 500, "relu")
    logits = tf.layers.dense(out4, LABEL_SIZE)

    # lstm
    # out3 = tf.concat([out1, out2], axis=2)
    # out4 = tf.keras.layers.LSTM(256)(out3)
    # out5 = tf.layers.dense(tf.layers.flatten(out4), 500, "relu")
    # logits = tf.layers.dense(out5, LABEL_SIZE)

    # pos only
    # out3 = tf.layers.dense(tf.layers.flatten(out2), 200, "relu")
    # logits = tf.layers.dense(out3, LABEL_SIZE)

    # pos only + lstm
    # out3 = tf.keras.layers.LSTM(64)(out2)
    # out4 = tf.layers.dense(tf.layers.flatten(out3), 200, "relu")
    # logits = tf.layers.dense(out4, LABEL_SIZE)


    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        loss_ori = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
        # loss = tf.reduce_mean(tf.multiply(features["weight"], loss_ori))
        loss = tf.reduce_mean(loss_ori)
        # 获取训练全局参数step
        global_step = tf.train.get_global_step()
        # 梯度下降算法，学习率是0.01
        optimizer = tf.train.GradientDescentOptimizer(0.01)
        # 将优化器和全局step的累加方法打包成一个方法组，相当于把若干个方法打包成事务执行的模式
        train = tf.group(optimizer.minimize(loss), tf.assign_add(global_step, 1))
        # 将所有内容封装成符合tf.estimator.Estimator规范的对象
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {  # cjd for export model
            "y": tf.argmax(logits, axis=1)
        }
        print("tf.argmax(logits, axis=1): ", tf.argmax(logits, axis=1))
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions)

def serving_input_fn():
    x = tf.placeholder(tf.int64, [None, 2*WORD_NUM], name='x')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'x': x,
    })()
    return input_fn



wordVocab = []
posVocab = []
label = []
x_train_pre = []
y_train_pre = []
train_weight = []
eval_weight = []

# vocab
# wordVocab = cjdpy.load_list("data/wordVocab.txt")
data = cjdpy.load_csv("data/train.txt")
posVocab = cjdpy.load_list("data/posVocab.txt")
label = list(set([item[-1] for item in data]))
LABEL_SIZE = len(label)
label.sort()
# print("word vocab size: ", len(wordVocab))
print("pos vocab size: ", len(posVocab))
print("label size: ", len(label))
# w2id = {wordVocab[i]: i for i in range(len(wordVocab))}
p2id = {posVocab[i]: i for i in range(len(posVocab))}
label2id = {label[i]: i for i in range(len(label))}


def create_class_weight(labels_dict, mu=0.15):
    total = 0
    for key in labels_dict:
        total += labels_dict[key]
    # total = np.sum(labels_dict.values())
    print(total)
    keys = labels_dict.keys()
    class_weight = dict()

    for key in keys:
        score = math.log(mu*total/float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0

    return class_weight

labels_dict = {}
for item in data:
    if item[-1] in labels_dict:
        labels_dict[item[-1]] += 1
    else:
        labels_dict[item[-1]] = 1
class_weight = create_class_weight(labels_dict)
print("class weight: ", class_weight)


# load embedding
posWord2vec = cjdpy.load_csv("posWord2vec.txt", ' ')
posWord2vec_dict = {}
for item in posWord2vec:
    posWord2vec_dict[item[0]] = [float(x) for x in item[1:]]
posEmbeddingMatrix = []
for pos in posVocab:
    if pos in posWord2vec_dict:
        posEmbeddingMatrix.append(posWord2vec_dict[pos])
    else:
        posEmbeddingMatrix.append([0 for i in range(100)])

wordEmbedding = cjdpy.load_csv("word_embedding", ' ')
wordVocab = ['UNK', 'PAD']
wordEmbeddingMatrix=[]
wordEmbeddingMatrix.append([0 for i in range(300)])
wordEmbeddingMatrix.append([0 for i in range(300)])
for i, item in enumerate(wordEmbedding):
    if i == 0: continue
    wordEmbeddingMatrix.append([float(x) for x in item[1:]])
    wordVocab.append(item[0])
print("word vocab size: ", len(wordVocab))
w2id = {wordVocab[i]: i for i in range(len(wordVocab))}


# make trainX, trainY, evalX, evalY
for item in data:
    tmp = []
    for i in range(WORD_NUM):
        element = w2id[item[i]] if item[i] in w2id else w2id['UNK']
        tmp.append(element)
    for i in range(WORD_NUM, 2*WORD_NUM, 1):
        element = p2id[item[i]] if item[i] in p2id else p2id['UNK']
        tmp.append(element)
    x_train_pre.append(tmp)
    y_train_pre.append(label2id[item[-1]])
    train_weight.append(class_weight[item[-1]])
x_train = np.array(x_train_pre)
y_train = np.array(y_train_pre)

print("trainX examples", x_train_pre[:5])
print("trainY examples ", y_train_pre[:5])
print("trainX shape: ", x_train.shape)
print("trainY shape: ", y_train.shape)

data = cjdpy.load_csv("data/dev.txt")
x_dev_pre = []
y_dev_pre = []
for item in data:
    tmp = []
    for i in range(WORD_NUM):
        element = w2id[item[i]] if item[i] in w2id else w2id['UNK']
        tmp.append(element)
    for i in range(WORD_NUM, 2*WORD_NUM, 1):
        element = p2id[item[i]] if item[i] in p2id else p2id['UNK']
        tmp.append(element)
    x_dev_pre.append(tmp)
    y_dev_pre.append(label2id[item[-1]])
    eval_weight.append(class_weight[item[-1]])
x_eval = np.array(x_dev_pre)
y_eval = np.array(y_dev_pre)


tf.logging.set_verbosity(tf.logging.INFO)

# input_fn
train_input_fn = tf.estimator.inputs.numpy_input_fn({"x": x_train, "weight": np.array(train_weight, np.float32)}, y_train, batch_size=32, num_epochs=None, shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn({"x": x_eval, "weight": np.array(eval_weight, np.float32)}, y_eval, batch_size=32, shuffle=False)


# 设置save_checkpoints_steps才会进行evaluation（只有save checkpoint的时候才会evaluation）
run_config = tf.estimator.RunConfig(save_checkpoints_steps=2000, keep_checkpoint_max=5, log_step_count_steps=4000)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir="output", config=run_config)

train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=30000)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, steps=None, start_delay_secs=0, throttle_secs=0)

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


res = list(estimator.predict(input_fn=eval_input_fn))


out = [label[item['y']] for item in res]
cnt, total = 0, 0
for i, item in enumerate(res):
    # if y_dev_pre[i] == label2id["shift"]:
    #     continue
    if item['y'] == label2id["shift"]:
        continue
    if y_dev_pre[i] == item['y']:
        cnt += 1
    total += 1
print(cnt, total)
print("acc: ", 1.0*cnt/total)
for i in range(len(out)):
    print(i, out[i])
    if i > 100:
        break
# print(out)
# print(res)
# print(len(res))

# estimator.export_savedmodel("output/model_fn", serving_input_fn)