背景:
需要对页面的点击,点赞,评论等多项指标进行优化,利用MMOE模型将多项页面指标作为目标函数进行学习。出于隐私保护需要代码中隐去了具体使用的特征,实际应用时可以根据需要引入序列特征,hash分桶等处理。
运行环境:
deepctr[GPU],pyspark==2.4.0,pandas,scikit-learn,numpy,keras==2.2.4
模型文件:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from time import time
from deepctr.feature_column import build_input_features, input_from_feature_columns,SparseFeat, DenseFeat, get_feature_names
from deepctr.layers.core import PredictionLayer, DNN
from deepctr.layers.utils import combined_dnn_input, reduce_sum
# from evaluation import evaluate_deepctr
def MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,),
gate_dnn_hidden_units=(), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu',
dnn_use_bn=False, task_types=('binary', 'binary', 'binary'), task_names=('click', 'like', 'comment')):
"""Instantiates the Multi-gate Mixture-of-Experts multi-task learning architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param num_experts: integer, number of experts.
:param expert_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of expert DNN.
:param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN.
:param gate_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of gate DNN.
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression']
:param task_names: list of str, indicating the predict target of each tasks
:return: a Keras model instance
"""
num_tasks = len(task_names)
if num_tasks <= 1:
raise ValueError("num_tasks must be greater than 1")
if num_experts <= 1:
raise ValueError("num_experts must be greater than 1")
if len(task_types) != num_tasks:
raise ValueError("num_tasks must be equal to the length of task_types")
for task_type in task_types:
if task_type not in ['binary', 'regression']:
raise ValueError("task must be binary or regression, {} is illegal".format(task_type))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
# build expert layer
expert_outs = []
for i in range(num_experts):
expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='expert_' + str(i))(dnn_input)
expert_outs.append(expert_network)
expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(expert_outs) # None,num_experts,dim
mmoe_outs = []
for i in range(num_tasks): # one mmoe layer: nums_tasks = num_gates
# build gate layers
gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='gate_' + task_names[i])(dnn_input)
gate_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax',
name='gate_softmax_' + task_names[i])(gate_input)
gate_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out)
# gate multiply the expert
gate_mul_expert = tf.keras.layers.Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False),
name='gate_mul_expert_' + task_names[i])([expert_concat, gate_out])
mmoe_outs.append(gate_mul_expert)
task_outs = []
for task_type, task_name, mmoe_out in zip(task_types, task_names, mmoe_outs):
# build tower layer
tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='tower_' + task_name)(mmoe_out)
logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(tower_output)
output = PredictionLayer(task_type, name=task_name)(logit)
task_outs.append(output)
model = tf.keras.models.Model(inputs=inputs_list, outputs=task_outs)
return model
PySpark训练和保存:
import pyspark
from sklearn.metrics import roc_auc_score
"""
获取SparkSession
"""
def get_spark_session(app_name=""):
spark_session = pyspark.sql.SparkSession.builder \
.config('spark.driver.extraClassPath', '') \
.config('spark.sql.parquet.compression.codec', 'none') \
.config('spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation', 'true') \
.config("spark.driver.memory", '8g') \
.config("spark.executor.memory", '8g') \
.config("spark.executor.cores", '4') \
.config("spark.executor.instances", '40') \
.config("spark.speculation", 'true') \
.config("spark.kryoserializer.buffer.max", "2000m") \
.config('spark.ui.showConsoleProgress', 'false') \
.master("local[*]") \
.appName(app_name) \
.enableHiveSupport() \
.getOrCreate()
return spark_session
if __name__ == "__main__":
epochs = 1
batch_size = 128
embedding_dim = 16
target = ["click", "like", "comment"]
sparse_features = []
dense_features = []
print("1、加载数据")
spark_session = get_spark_session()
sql_train = 'SELECT * from ${t1}'
df = spark_session.sql(sql_train)
df.persist() # persist the contents of the DataFrame
df.printSchema() # show column info
df.columns # show column names
# for index, value in enumerate(df.columns):
# print(index, value)
df = df.toPandas()
data = df[df['flag']=='train'][sparse_features + dense_features + target]
val = df[df['flag']=='val'][sparse_features + dense_features + target]
# 1.fill nan dense_feature and do simple Transformation for dense features
data[dense_features] = data[dense_features].fillna(0, )
val[dense_features] = val[dense_features].fillna(0, )
data[dense_features] = np.log(data[dense_features] + 1.0)
val[dense_features] = np.log(val[dense_features] + 1.0)
print('data.shape', data.shape)
print('data.columns', data.columns.tolist())
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=embedding_dim)
for feat in sparse_features] + [DenseFeat(feat, 1) for feat in dense_features]
# fixlen_feature_columns = [DenseFeat(feat, 1) for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(dnn_feature_columns)
# 3.generate input data for model
train_model_input = {name: data[name] for name in feature_names}
val_model_input = {name: val[name] for name in feature_names}
# userid_list = val[''].astype(str).tolist()
# test_model_input = {name: test[name] for name in feature_names}
train_labels = [data[y].values for y in target]
val_labels = [val[y].values for y in target]
# 4.Define Model,train,predict and evaluate
train_model = MMOE(dnn_feature_columns)
train_model.compile("adagrad", loss='binary_crossentropy')
# print(train_model.summary())
for epoch in range(epochs):
history = train_model.fit(train_model_input, train_labels,
batch_size=batch_size, epochs=1, verbose=1)
val_pred_ans = train_model.predict(val_model_input, batch_size=batch_size * 4)
validation_click_roc_auc = roc_auc_score(val[['click']], val_pred_ans[:][-3])
validation_like_roc_auc = roc_auc_score(val[['like']], val_pred_ans[:][-2])
validation_comment_roc_auc = roc_auc_score(val[['comment']], val_pred_ans[:][-1])
print('epoch----------------------',validation_click_roc_auc,validation_like_roc_auc,validation_comment_roc_auc)
train_model.save('${MODEL_HOME}/....')
# t1 = time()
# pred_ans = train_model.predict(test_model_input, batch_size=batch_size * 20)
# t2 = time()
# print('4个目标行为%d条样本预测耗时(毫秒):%.3f' % (len(test), (t2 - t1) * 1000.0))
# ts = (t2 - t1) * 1000.0 / len(test) * 2000.0
# print('4个目标行为2000条样本平均预测耗时(毫秒):%.3f' % ts)
模型加载与预测:
from tensorflow.keras.models import load_model
val_model_input = {name: val[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
test_model = MMOE(dnn_feature_columns)
test_model = load_model('')
# test_model.summary()
# print(val_model_input)
test_model.predict(val_model_input)
参考: