Deep&Cross Network算法代码

mingchen_peng

于 2024-09-04 19:47:00 发布

阅读量174

点赞数 1

分类专栏：推荐系统文章标签：算法

本文链接：https://blog.csdn.net/mingchen_peng/article/details/141902281

版权

推荐系统专栏收录该内容

2 篇文章 0 订阅

订阅专栏

以下代码均采用Tensorflow1.15版本
数据集私聊我

import tensorflow as tf
import numpy as np
import pandas as pd

# 定义特征列
def get_feature_columns():
    # 假设数据集有 10 个数值特征和 10 个类别特征
    numerical_feature_columns = [tf.feature_column.numeric_column("num_feature_{}".format(i)) for i in range(10)]
    categorical_feature_columns = [tf.feature_column.categorical_column_with_hash_bucket("cat_feature_{}".format(i), hash_bucket_size=100) for i in range(10)]
    return numerical_feature_columns + categorical_feature_columns

# 定义 DCN 模型
def dcn_model(features, labels, mode):
    # 嵌入层
    embedding_list = []
    for column in get_feature_columns():
        if isinstance(column, tf.feature_column.categorical_column_with_hash_bucket):
            embedding = tf.feature_column.embedding_column(column, dimension=8)
            embedding_list.append(embedding)

    # 交叉网络部分
    cross_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
    num_layers = 3  # 交叉网络的层数
    prev_layer = cross_input
    for _ in range(num_layers):
        x_0 = prev_layer
        x_1 = tf.layers.dense(prev_layer, 1, activation=None)
        prev_layer = x_0 * x_1 + prev_layer

    # 深度网络部分
    deep_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
    deep_hidden_1 = tf.layers.dense(deep_input, 128, activation=tf.nn.relu)
    deep_hidden_2 = tf.layers.dense(deep_hidden_1, 64, activation=tf.nn.relu)

    # 合并
    combined_output = tf.layers.dense(tf.concat([prev_layer, deep_hidden_2], axis=1), 1)

    # 预测和损失
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'predictions': combined_output}
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = tf.losses.mean_squared_error(labels, combined_output)

    # 优化器
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

    # 训练和评估操作
    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {'mse': tf.metrics.mean_squared_error(labels, combined_output)}
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

# 输入函数
def input_fn(data_path, batch_size):
    data = pd.read_csv(data_path)
    labels = data['label']
    features = data.drop('label', axis=1)
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).repeat()
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

# 训练和评估
def train_and_evaluate():
    # 创建 Estimator
    estimator = tf.estimator.Estimator(
        model_fn=dcn_model,
        model_dir='your_model_dir'
    )

    # 训练
    estimator.train(
        input_fn=lambda: input_fn('train_data_path.csv', batch_size=128),
        steps=1000
    )

    # 评估
    estimator.evaluate(
        input_fn=lambda: input_fn('eval_data_path.csv', batch_size=128)
    )

if __name__ == '__main__':
    train_and_evaluate()