我们之前用indicator的方式将数据multi-hot上,输出结果的shape为[batchsize,50],我们能不能对维度再次压缩呢?
答案是可以的。这里我们将会运用embedding的方法。大体写法均没有动
import tensorflow as tf
def _parse_function(record):
"""Extracts features and labels.
Args:
record: File path to a TFRecord file
Returns:
A `tuple` `(labels, features)`:
features: A dict of tensors representing the features
labels: A tensor with the corresponding labels.
"""
features = {
"terms": tf.VarLenFeature(dtype=tf.string), # terms are strings of varying lengths
"labels": tf.FixedLenFeature(shape=[1], dtype=tf.float32) # labels are 0 or 1
}
parsed_features = tf.parse_single_example(record, features)
terms = parsed_features['terms'].values
labels = parsed_features['labels']
return {'terms': terms}, labels
def my_input_fn(input_filenames, num_epochs=None, shuffle=True):
# Same code as above; create a dataset and map features and labels.
ds = tf.data.TFRecordDataset(input_filenames)
ds = ds.map(_parse_function)
if shuffle:
ds = ds.shuffle(10000)
# Our feature data is variable-length, so we pad and batch
# each field of the dataset structure to whatever size is necessary.
ds = ds.padded_batch(25, ds.output_shapes)
ds = ds.repeat(num_epochs)
# Return the next batch of data.
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
def add_layer(inputs, input_size, output_size, activation_function=None):
weights = tf.Variable(tf.random_normal([input_size, output_size], stddev=.1))
biases = tf.Variable(tf.zeros([output_size]) + .1)
wx_b = tf.matmul(inputs, weights) + biases
if activation_function is None:
outputs = wx_b
else:
outputs = activation_function(wx_b)
return weights, biases, outputs
def _loss(pred, ys):
log_loss = tf.reduce_mean(- ys*tf.log(pred) - (1-ys)*tf.log(1-pred)) # 请自行尝试MSE和交叉熵 对AUC的影响
return log_loss
def train_step(learning_rate, loss):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # 动态lr
return train_step
train_path = my_input_fn('./nlp/train.tfrecord') # shape [25, None]
informative_terms = ("bad", "great", "best", "worst", "fun", "beautiful",
"excellent", "poor", "boring", "awful", "terrible",
"definitely", "perfect", "liked", "worse", "waste",
"entertaining", "loved", "unfortunately", "amazing",
"enjoyed", "favorite", &#