```python
# -*- coding:utf-8 -*-
"""
Author:
Yiyuan Liu, lyy930905@gmail.com
zanshuxun, zanshuxun@aliyun.com
Reference:
[1] [Jiaqi Ma, Zhe Zhao, Xinyang Yi, et al. Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts[C]](https://dl.acm.org/doi/10.1145/3219819.3220007)
"""
import tensorflow as tf
from deepctr.feature_column import build_input_features, input_from_feature_columns
from deepctr.layers.utils import combined_dnn_input
from deepctr.layers.core import PredictionLayer, DNN
from tensorflow.python.keras.initializers import glorot_normal
from tensorflow.python.keras.layers import Layer
class MMOELayer(Layer):
"""
The Multi-gate Mixture-of-Experts layer in MMOE model
Input shape
- 2D tensor with shape: ``(batch_size,units)``.
Output shape
- A list with **num_tasks** elements, which is a 2D tensor with shape: ``(batch_size, output_dim)`` .
Arguments
- **num_tasks**: integer, the number of tasks, equal to the number of outputs.
- **num_experts**: integer, the number of experts.
- **output_dim**: integer, the dimension of each output of MMOELayer.
References
- [Jiaqi Ma, Zhe Zhao, Xinyang Yi, et al. Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts[C]](https://dl.acm.org/doi/10.1145/3219819.3220007)
"""
def __init__(self, num_tasks, num_experts, output_dim, seed=1024, **kwargs):
self.num_experts = num_experts
self.num_tasks = num_tasks
self.output_dim = output_dim
self.seed = seed
super(MMOELayer, self).__init__(**kwargs)
def build(self, input_shape):
input_dim = int(input_shape[-1])
self.expert_kernel = self.add_weight(
name='expert_kernel',
shape=(input_dim, self.num_experts * self.output_dim),
dtype=tf.float32,
initializer=glorot_normal(seed=self.seed))
self.gate_kernels = []
for i in range(self.num_tasks):
self.gate_kernels.append(self.add_weight(
name='gate_weight_'.format(i),
shape=(input_dim, self.num_experts),
dtype=tf.float32,
initializer=glorot_normal(seed=self.seed)))
super(MMOELayer, self).build(input_shape)
def call(self, inputs, **kwargs):
outputs = []
expert_out = tf.tensordot(inputs, self.expert_kernel, axes=(-1, 0))
expert_out = tf.reshape(expert_out, [-1, self.output_dim, self.num_experts])
for i in range(self.num_tasks):
gate_out = tf.tensordot(inputs, self.gate_kernels[i], axes=(-1, 0))
gate_out = tf.nn.softmax(gate_out)
gate_out = tf.tile(tf.expand_dims(gate_out, axis=1), [1, self.output_dim, 1])
output = tf.reduce_sum(tf.multiply(expert_out, gate_out), axis=2)
outputs.append(output)
return outputs
def get_config(self):
config = {'num_tasks': self.num_tasks,
'num_experts': self.num_experts,
'output_dim': self.output_dim}
base_config = super(MMOELayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return [input_shape[0], self.output_dim] * self.num_tasks
def MMOE(dnn_feature_columns, num_tasks, tasks, num_experts=4, expert_dim=8, dnn_hidden_units=(128, 128),
l2_reg_embedding=1e-5, l2_reg_dnn=0, task_dnn_units=None, seed=1024, dnn_dropout=0, dnn_activation='relu'):
"""Instantiates the Multi-gate Mixture-of-Experts architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param num_tasks: integer, number of tasks, equal to number of outputs, must be greater than 1.
:param tasks: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression']
:param num_experts: integer, number of experts.
:param expert_dim: integer, the hidden units of each expert.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared-bottom DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param task_dnn_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:return: a Keras model instance
"""
if num_tasks <= 1:
raise ValueError("num_tasks must be greater than 1")
if len(tasks) != num_tasks:
raise ValueError("num_tasks must be equal to the length of tasks")
for task in tasks:
if task not in ['binary', 'regression']:
raise ValueError("task must be binary or regression, {} is illegal".format(task))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
False, seed=seed)(dnn_input)
mmoe_outs = MMOELayer(num_tasks, num_experts, expert_dim)(dnn_out)
if task_dnn_units != None:
mmoe_outs = [DNN(task_dnn_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(mmoe_out) for mmoe_out in
mmoe_outs]
task_outputs = []
for mmoe_out, task in zip(mmoe_outs, tasks):
logit = tf.keras.layers.Dense(
1, use_bias=False, activation=None)(mmoe_out)
output = PredictionLayer(task)(logit)
task_outputs.append(output)
model = tf.keras.models.Model(inputs=inputs_list,
outputs=task_outputs)
return model
run_mmoe部分
```python
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from time import time
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
from mmoe import MMOE
from evaluation import evaluate_deepctr
# GPU相关设置
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 设置GPU按需增长
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
if __name__ == "__main__":
epochs = 2
batch_size = 512
embedding_dim = 16
target = ["read_comment", "like", "click_avatar", "forward"]
sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
dense_features = ['videoplayseconds', ]
data = pd.read_csv(r"C:\wechat_data\data\wechat_algo_data1\user_action.csv")
feed = pd.read_csv(r'C:\wechat_data\data\wechat_algo_data1\feed_info.csv')
feed[["bgm_song_id", "bgm_singer_id"]] += 1 # 0 用于填未知
feed[["bgm_song_id", "bgm_singer_id", "videoplayseconds"]] = \
feed[["bgm_song_id", "bgm_singer_id", "videoplayseconds"]].fillna(0)
feed['bgm_song_id'] = feed['bgm_song_id'].astype('int64')
feed['bgm_singer_id'] = feed['bgm_singer_id'].astype('int64')
data = data.merge(feed[['feedid', 'authorid', 'videoplayseconds', 'bgm_song_id', 'bgm_singer_id']], how='left',
on='feedid')
test = pd.read_csv(r'C:\wechat_data\data\wechat_algo_data1\test_a.csv')
test = test.merge(feed[['feedid', 'authorid', 'videoplayseconds', 'bgm_song_id', 'bgm_singer_id']], how='left',
on='feedid')
# 1.fill nan dense_feature and do simple Transformation for dense features
data[dense_features] = data[dense_features].fillna(0, )
test[dense_features] = test[dense_features].fillna(0, )
data[dense_features] = np.log(data[dense_features] + 1.0)
test[dense_features] = np.log(test[dense_features] + 1.0)
print('data.shape', data.shape)
print('data.columns', data.columns.tolist())
print('unique date_: ', data['date_'].unique())
train = data[data['date_'] < 14]
val = data[data['date_'] == 14] # 第14天样本作为验证集
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=embedding_dim)
for feat in sparse_features] + [DenseFeat(feat, 1) for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(dnn_feature_columns)
# 3.generate input data for model
train_model_input = {name: train[name] for name in feature_names}
val_model_input = {name: val[name] for name in feature_names}
userid_list = val['userid'].astype(str).tolist()
test_model_input = {name: test[name] for name in feature_names}
train_labels = [train[y].values for y in target]
val_labels = [val[y].values for y in target]
# 4.Define Model,train,predict and evaluate
train_model = MMOE(dnn_feature_columns, num_tasks=4, expert_dim=8, dnn_hidden_units=(128, 128),
tasks=['binary', 'binary', 'binary', 'binary'])
train_model.compile("adagrad", loss='binary_crossentropy')
# print(train_model.summary())
for epoch in range(epochs):
history = train_model.fit(train_model_input, train_labels,
batch_size=batch_size, epochs=1, verbose=1)
val_pred_ans = train_model.predict(val_model_input, batch_size=batch_size * 4)
evaluate_deepctr(val_labels, val_pred_ans, userid_list, target)
t1 = time()
pred_ans = train_model.predict(test_model_input, batch_size=batch_size * 20)
t2 = time()
print('4个目标行为%d条样本预测耗时(毫秒):%.3f' % (len(test), (t2 - t1) * 1000.0))
ts = (t2 - t1) * 1000.0 / len(test) * 2000.0
print('4个目标行为2000条样本平均预测耗时(毫秒):%.3f' % ts)
# 5.生成提交文件
for i, action in enumerate(target):
test[action] = pred_ans[i]
test[['userid', 'feedid'] + target].to_csv('result.csv', index=None, float_format='%.6f')
print('to_csv ok')
evaluation
# coding: utf-8
import time
from collections import defaultdict
import numpy as np
from sklearn.metrics import roc_auc_score
def uAUC(labels, preds, user_id_list):
"""Calculate user AUC"""
user_pred = defaultdict(lambda: [])
user_truth = defaultdict(lambda: [])
for idx, truth in enumerate(labels):
user_id = user_id_list[idx]
pred = preds[idx]
truth = labels[idx]
user_pred[user_id].append(pred)
user_truth[user_id].append(truth)
user_flag = defaultdict(lambda: False)
for user_id in set(user_id_list):
truths = user_truth[user_id]
flag = False
# 若全是正样本或全是负样本,则flag为False
for i in range(len(truths) - 1):
if truths[i] != truths[i + 1]:
flag = True
break
user_flag[user_id] = flag
total_auc = 0.0
size = 0.0
for user_id in user_flag:
if user_flag[user_id]:
auc = roc_auc_score(np.asarray(user_truth[user_id]), np.asarray(user_pred[user_id]))
total_auc += auc
size += 1.0
user_auc = float(total_auc)/size
return user_auc
def compute_weighted_score(score_dict, weight_dict):
'''基于多个行为的uAUC值,计算加权uAUC
Input:
scores_dict: 多个行为的uAUC值映射字典, dict
weights_dict: 多个行为的权重映射字典, dict
Output:
score: 加权uAUC值, float
'''
score = 0.0
weight_sum = 0.0
for action in score_dict:
weight = float(weight_dict[action])
score += weight*score_dict[action]
weight_sum += weight
score /= float(weight_sum)
score = round(score, 6)
return score
def evaluate_deepctr(val_labels,val_pred_ans,userid_list,target):
eval_dict = {}
for i, action in enumerate(target):
eval_dict[action] = uAUC(val_labels[i], val_pred_ans[i], userid_list)
print(eval_dict)
weight_dict = {"read_comment": 4, "like": 3, "click_avatar": 2, "favorite": 1, "forward": 1,
"comment": 1, "follow": 1}
weight_auc = compute_weighted_score(eval_dict, weight_dict)
print("Weighted uAUC: ", weight_auc)