1、youtube召回算法
深度召回算法dssm,fm,ncf,sdm,mind
用tensorflow 1.X,不然deepmatch会报eager错误
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from deepctr.inputs import SparseFeat, VarLenSparseFeat
# from deepctr.inputs import build_input_features
from sklearn.preprocessing import LabelEncoder
from deepmatch.models import *
from deepmatch.utils import sampledsoftmaxloss
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
print(tf.__version__)
# tf.compat.v1.disable_eager_execution()
#
# tf.compat.v1.enable_eager_execution()
# tf.config.experimental_run_functions_eagerly(True)
data = pd.read_csvdata = pd.read_csv("/Users/lonng/Desktop/推荐学习/deep_rec/movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
SEQ_LEN = 50
features = ['user_id', 'movie_id', 'gender', 'age', 'occupation', 'zip']
feature_max_idx = {}
for feature in features:
lbe = LabelEncoder()
data[feature] = lbe.fit_transform(data[feature]) + 1
feature_max_idx[feature] = data[feature].max() + 1
user_profile = data[["user_id", "gender", "age", "occupation", "zip"]].drop_duplicates('user_id')
item_profile = data[["movie_id"]].drop_duplicates('movie_id')
user_profile.set_index("user_id", inplace=True)
user_item_list = data.groupby("user_id")['movie_id'].apply(list)
from tqdm import tqdm
import random
import numpy as np
def gen_data_set(data, negsample=0):
data.sort_values("timestamp", inplace=True)
item_ids = data['movie_id'].unique()
train_set = []
test_set = []
for reviewer