此代码不是学习视频的相关代码,是自己根据学习中文邮件分类后自己实践写的代码。
主要内容:实现中文邮件分类--也可扩展至多分类
主要方法:Rnn+Attention
RNN部分代码如下:
class RNNAttention(object):
def __init__(self, text_length, nclasses, embedding_size, hidden_size, attention_size):
# feeds需要的参数----中文
self.input_x = tf.placeholder(tf.float32, [None, text_length, embedding_size], name="input_x")
self.input_y = tf.placeholder(tf.float32, [None, nclasses], name="input_y")
self.seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# embedding层
with tf.name_scope('embedding'):
self.embedded_chars = self.input_x
# (Bi-)RNN layer(-s) RNN层
self.rnn_outputs, _ = bi_rnn(GRUCell(hidden_size), GRUCell(hidden_size),
inputs=self.embedded_chars, sequence_length=self.seq_len_ph, dtype=tf.float32)
# Attention layer 注意力机制层
with tf.name_scope('Attention_layer'):
self.attention_output, self.alphas = attention(self.rnn_outputs, attention_size, return_alphas=True)
# Dropout
self.dropout = tf.nn.dropout(self.attention_output, self.dropout_keep_prob)
# Fully connected layer
with tf.name_scope('Fully_connected_layer'):
W = tf.Variable(tf.truncated_normal([hidden_size * 2, nclasses], stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[nclasses]))
self.fin_out = tf.nn.xw_plus_b(self.dropout, W, b, name="pred")
self.predictions = tf.argmax(self.fin_out, 1, name="predictions")
# self.predictions = self.fin_out
with tf.name_scope("pred"):
self.pred = self.predictions
# 计算损失
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.fin_out, labels=self.input_y)
self.loss = tf.reduce_mean(losses)
# 计算准确度
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Attention部分代码如下:
def attention(inputs, attention_size, time_major=False, return_alphas=False):
if isinstance(inputs, tuple):
inputs = tf.concat(inputs, 2)
if time_major:
# (T,B,D) => (B,T,D)
inputs = tf.array_ops.transpose(inputs, [1, 0, 2])
hidden_size = inputs.shape[2].value # D value - hidden size of the RNN layer
# Trainable parameters
# 可训练的参数
w_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
with tf.name_scope('v'):
v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)
vu = tf.tensordot(v, u_omega, axes=1, name='vu') # (B,T) shape
alphas = tf.nn.softmax(vu, name='alphas') # (B,T) shape
output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)
if not return_alphas:
return output
else:
return output, alphas