以下是一个使用TensorFlow实现CNN进行文本二分类的Python代码示例,其中包括了一个简单的数据示例:
```python
import tensorflow as tf
# 定义超参数
embedding_size = 50
filter_sizes = [3, 4, 5]
num_filters = 128
dropout_keep_prob = 0.5
l2_reg_lambda = 0.0
learning_rate = 1e-3
# 定义输入数据
input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x")
input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# 定义卷积层
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# 卷积层
filter_shape = [filter_size, embedding_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
input_x_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# 非线性激活函数
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# 最大池化层
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
# 合并所有池化层的输出
num_filters_total = num_filters * len(filter_sizes)
h_pool = tf.concat(pooled_outputs, 3)
h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
# Dropout层
with tf.name_scope("dropout"):
h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)
# 全连接层
with tf.name_scope("output"):
W = tf.get_variable(
"W",
shape=[num_filters_total, num_classes],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
predictions = tf.argmax(scores, 1, name="predictions")
# 定义损失函数
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=input_y)
l2_loss = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_reg_lambda), tf.trainable_variables())
loss = tf.reduce_mean(losses + l2_loss)
# 定义优化器
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars)
# 训练模型
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# 示例数据
x_train = [
[0.2, 0.4, 0.6, 0.8, 0.3, 0.1, 0.9, 0.5, 0.7, 0.2],
[0.3, 0.6, 0.9, 0.2, 0.5, 0.8, 0.1, 0.7, 0.4, 0.2]
]
y_train = [
[0, 1],
[1, 0]
]
# 训练模型
for i in range(1000):
_, loss_val = sess.run([train_op, loss], feed_dict={input_x: x_train, input_y: y_train, dropout_keep_prob: 0.5})
if i % 100 == 0:
print("Step %d, Loss: %f" % (i, loss_val))
```
上面代码中的示例数据包括两个输入样本,每个样本由10个特征值组成,标签是一个二分类标签。在实际应用中,应该使用更大的数据集进行训练。