以下代码均采用Tensorflow1.15版本
数据集私聊我
import tensorflow as tf
import numpy as np
import pandas as pd
def get_feature_columns():
numerical_feature_columns = [tf.feature_column.numeric_column("num_feature_{}".format(i)) for i in range(10)]
categorical_feature_columns = [tf.feature_column.categorical_column_with_hash_bucket("cat_feature_{}".format(i), hash_bucket_size=100) for i in range(10)]
return numerical_feature_columns + categorical_feature_columns
def deep_fm_model(features, labels, mode):
embedding_list = []
for column in get_feature_columns():
if isinstance(column, tf.feature_column.categorical_column_with_hash_bucket):
embedding = tf.feature_column.embedding_column(column, dimension=8)
embedding_list.append(embedding)
fm_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
linear_part = tf.layers.dense(fm_input, 1)
sum_square = tf.square(tf.reduce_sum(fm_input, axis=1))
square_sum = tf.reduce_sum(tf.square(fm_input), axis=1)
fm_part = 0.5 * tf.reduce_sum(sum_square - square_sum, axis=1, keepdims=True)
deep_input = tf.concat([tf.feature_column.input_layer(features, column) for column in get_feature_columns()], axis=1)
deep_hidden_1 = tf.layers.dense(deep_input, 128, activation=tf.nn.relu)
deep_hidden_2 = tf.layers.dense(deep_hidden_1, 64, activation=tf.nn.relu)
deep_output = tf.layers.dense(deep_hidden_2, 1)
combined_output = linear_part + fm_part + deep_output
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {'predictions': combined_output}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.mean_squared_error(labels, combined_output)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {'mse': tf.metrics.mean_squared_error(labels, combined_output)}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def input_fn(data_path, batch_size):
data = pd.read_csv(data_path)
labels = data['label']
features = data.drop('label', axis=1)
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).repeat()
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
return features, labels
def train_and_evaluate():
estimator = tf.estimator.Estimator(
model_fn=deep_fm_model,
model_dir='your_model_dir'
)
estimator.train(
input_fn=lambda: input_fn('train_data_path.csv', batch_size=128),
steps=1000
)
estimator.evaluate(
input_fn=lambda: input_fn('eval_data_path.csv', batch_size=128)
)
if __name__ == '__main__':
train_and_evaluate()