以下代码均采用Tensorflow1.15版本
数据集私聊我
import tensorflow as tf
import numpy as np
import pandas as pd
# 定义特征列
def get_feature_columns():
# 假设数据集有 10 个数值特征和 10 个类别特征
numerical_feature_columns = [tf.feature_column.numeric_column("num_feature_{}".format(i)) for i in range(10)]
categorical_feature_columns = [tf.feature_column.categorical_column_with_hash_bucket("cat_feature_{}".format(i), hash_bucket_size=100) for i in range(10)]
return numerical_feature_columns + categorical_feature_columns
# 定义 DCN 模型
def dcn_model(features, labels, mode):
# 嵌入层
embedding_list = []
for column in get_feature_columns():
if isinstance(column, tf.feature_column.categorical_column_with_hash_bucket):
embedding = tf.feature_column.embedding_column(column, dimension=8)
embedding_list.append(embedding)
# 交叉网络部分
cross_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
num_layers = 3 # 交叉网络的层数
prev_layer = cross_input
for _ in range(num_layers):
x_0 = prev_layer
x_1 = tf.layers.dense(prev_layer, 1, activation=None)
prev_layer = x_0 * x_1 + prev_layer
# 深度网络部分
deep_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
deep_hidden_1 = tf.layers.dense(deep_input, 128, activation=tf.nn.relu)
deep_hidden_2 = tf.layers.dense(deep_hidden_1, 64, activation=tf.nn.relu)
# 合并
combined_output = tf.layers.dense(tf.concat([prev_layer, deep_hidden_2], axis=1), 1)
# 预测和损失
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {'predictions': combined_output}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.mean_squared_error(labels, combined_output)
# 优化器
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
# 训练和评估操作
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {'mse': tf.metrics.mean_squared_error(labels, combined_output)}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
# 输入函数
def input_fn(data_path, batch_size):
data = pd.read_csv(data_path)
labels = data['label']
features = data.drop('label', axis=1)
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).repeat()
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
return features, labels
# 训练和评估
def train_and_evaluate():
# 创建 Estimator
estimator = tf.estimator.Estimator(
model_fn=dcn_model,
model_dir='your_model_dir'
)
# 训练
estimator.train(
input_fn=lambda: input_fn('train_data_path.csv', batch_size=128),
steps=1000
)
# 评估
estimator.evaluate(
input_fn=lambda: input_fn('eval_data_path.csv', batch_size=128)
)
if __name__ == '__main__':
train_and_evaluate()