花了一点时间将之前的文本分类改成现在的QA匹配,实际上思路还是很简单的,原来的x 输入换成xq,xa,label不变。
匹配层我用的是difference+ absolute value的操作。它计算两个句子,q,a相似程度。非常简单。
代码一步步给。
定义:
模型所需数据个数,qa的对数
模型考虑的词汇表大小
每个句子的长度
词向量维度
批次大小
学习率
# dialogue max pair q,a
max_pair = 500000
# top k frequent word ,k
MAX_FEATURES = 450
# fixed q,a length
MAX_SENTENCE_LENGTH = 30
embedding_size = 100
batch_size = 300
# learning rate
lr = 0.001
然后把相关input x改成input q和input a,embedding weight不变,其他的cnn操作都要从x改为x_q,x_a.
import tensorflow as tf
import numpy as np
class TextCNN:
def __init__(self, filter_sizes,num_filters,num_classes, learning_rate, batch_size, decay_steps, decay_rate,sequence_length,vocab_size,embed_size,
is_training,initializer=tf.random_normal_initializer(stddev=0.1),multi_label_flag=False,clip_gradients=5.0,decay_rate_big=0.50):
"""init all hyperparameter here"""
# set hyperparamter
self.num_classes = num_classes
self.batch_size = batch_size
self.sequence_length=sequence_length
self.vocab_size=vocab_size
self.embed_size=embed_size
self.is_training=is_training
self.learning_rate = tf.Variable(learning_rate, trainable=False, name="learning_rate")#ADD learning_rate
self.learning_rate_decay_half_op = tf.assign(self.learning_rate, self.learning_rate * decay_rate_big)
self.filter_sizes=filter_sizes # it is a list of int. e.g. [3,4,5]
self.num_filters=num_filters
self.initializer=initializer
self.num_filters_total=self.num_filters * len(filter_sizes) #how many filters totally.
self.multi_label_flag=multi_label_flag
self.clip_gradients = clip_gradients
# add placeholder (X,label)
self.input_x_q = tf.placeholder(tf.int32, [None, self.sequence_length], name="input_x") # X
self.input_x_a = tf.placeholder(tf.int32, [None, self.sequence_length], name="input_x") # X
self.input_y = tf.placeholder(tf.int32, [None,],name="input_y") # y:[None,num_classes]
self.input_y_multilabel = tf.placeholder(tf.float32,[None,self.num_classes], name="input_y_multilabel") # y:[None,num_classes]. this is for multi-label classification only.
self.dropout_keep_prob=tf.placeholder(tf.float32,name="dropout_keep_prob")
self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
self.epoch_step=tf.Variable(0,trainable=False,name="Epoch_Step")
self.epoch_increment=tf.assign(self.epoch_step,tf.add(self.epoch_step,tf.constant(1)))
self.decay_steps, self.decay_rate = decay_steps, decay_rate
self.instantiate_weights()
self.logits = self.inference() #[None, self.label_size]. main computation graph is here.
if not is_training:
return
if multi_label_flag:
print("going to use multi label loss.")
self.loss_val = self.loss_multilabel()
else:
print("going to use single label loss.")
self.loss_val = self.loss()
self.train_op = self.train()
self.predictions = tf.argmax(self.logits, 1, name="predictions") # shape:[None,]
if not self.multi_label_flag:
correct_prediction = tf.equal(tf.cast(self.predictions,tf.int32), self.input_y) #tf.argmax(self.logits, 1)-->[batch_size]
self.accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Accuracy") # shape=()
else:
self.accuracy = tf.constant(0.5) #fuke accuracy. (you can calcuate accuracy outside of graph using method calculate_accuracy(...) in train.py)
def instantiate_weights(self):
"""define all weights here"""
with tf.name_scope("embedding"): # embedding matrix
self.Embedding_q = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size], initializer=self.initializer) #[vocab_size,embed_size] tf.random_uniform([self.vocab_size, self.embed_size],-1.0,1.0)
#self.Embedding_a = tf.get_variable("Embedding", shape=[self.vocab_size, self.embed_size], initializer=self.initializer) #[vocab_size,embed_size] tf.random_uniform([self.vocab_size, self.embed_size],-1.0,1.0)
self.W_projection = tf.get_variable("W_projection",shape=[self.num_filters_total, self.num_classes],initializer=self.initializer) #[embed_size,label_size]
self.b_projection = tf.get_variable("b_projection