源码
import tensorflow as tf
import sys
# # Training samples path, change to your local path
# training_samples_file_path = tf.keras.utils.get_file("trainingSamples.csv",
# "file:///D:/workSpace/senro-tree-recommend/src/main"+
# "/resources/static/sampledata/trainingSamples2/part-00000-56521c27-aadd-4b1d-b7b3-417a39f0f3c8-c000.csv")
# # Test samples path, change to your local path
# test_samples_file_path = tf.keras.utils.get_file("testSamples.csv",
# "file:///D:/workSpace/senro-tree-recommend/src/main"+
# "/resources/static/sampledata/testSamples2/part-00000-8ec63f48-ecca-49cb-af35-25a6f6da3425-c000.csv")
# basePath=sys.argv[1]
basePath="D:/data/work/"
trainPath="file:///"+basePath+"sampledata/trainingSamples2"
testPath="file:///"+basePath+"sampledata/testSamples2"
tensorPath=basePath+"tensor"
# Training samples path, change to your local path
training_samples_file_path = tf.keras.utils.get_file("trainingSamples.csv",
trainPath)
# Test samples path, change to your local path
test_samples_file_path = tf.keras.utils.get_file("testSamples.csv",
testPath)
# load sample as tf dataset
def get_dataset(file_path):
dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size=12,
label_name='label',
na_value="0",
num_epochs=1,
ignore_errors=True)
return dataset
# split as test dataset and training dataset
train_dataset = get_dataset(training_samples_file_path)
test_dataset = get_dataset(test_samples_file_path)
# item id embedding feature
# movie_col = tf.feature_column.categorical_column_with_identity(key='itemId', num_buckets=1001)
movie_col = tf.feature_column.categorical_column_with_hash_bucket(key='itemId', hash_bucket_size=1001)
movie_emb_col = tf.feature_column.embedding_column(movie_col, 10)
# user id embedding feature
# user_col = tf.feature_column.categorical_column_with_identity(key='userId', num_buckets=30001)
user_col = tf.feature_column.categorical_column_with_hash_bucket(key='userId', hash_bucket_size=30001)
user_emb_col = tf.feature_column.embedding_column(user_col, 10)
# define