谨此记录,mark
- 时间:2024.01.20
- 问题排查:2h 😦
报错如下:
InvalidArgumentError (see above for traceback): indices[48,9] = 1656 is not in [0, 1656)
[[node Deep/embedding_lookup (defined at /apsara/TempRoot/Odps/tbcrm_algorithm_dev_202401200852275gtjap1qosod_4666727b_b21c_4cb6_9dd3_8cf7b352d11f_AlgoTask_0_0/worker@kangaroo011177170244.na610#21/tensorflow_jobs/AlimiCTR/models/deepfm.py:71) = GatherV2[Taxis=DT_INT32, Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:chief/replica:0/task:0/device:CPU:0"](Deep/concat, IteratorGetNext:2, Deep/embedding_lookup/axis)]]
[[{{node clip_by_global_norm/mul_5_S265}} = _Recv[client_terminated=false, recv_device="/job:ps/replica:0/task:1/device:CPU:0", send_device="/job:chief/replica:0/task:0/device:CPU:0", send_device_incarnation=3940832609751527887, tensor_name="edge_741_clip_by_global_norm/mul_5", tensor_type=DT_FLOAT, _device="/job:ps/replica:0/task:1/device:CPU:0"]()]]
解决方法 😃 :
sparse_encoding_size为类别变量分桶数量,emb size 数量要 +1
self.sparse_encoding_size = self.get_sparse_encoding_size()
self.set_model_hyperparameter('sparse_encoding_size', self.sparse_encoding_size + 1)
with tf.variable_scope(DNN_SCOPE, partitioner=self.partitioner()):
input_dense = features["dense_main"]
input_sparse = features['sparse_main']
sparse_emb_table = get_token_embeddings(sparse_token_size, embedding_dim=embedding_dim,
variable_name="sparse_emb", zero_pad=True)
sparse_emb = tf.nn.embedding_lookup(sparse_emb_table, input_sparse)
WHERE
def get_token_embeddings(vocab_size, embedding_dim, variable_name="embedding_dim", zero_pad=True):
# with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
embeddings = tf.get_variable(variable_name,
dtype=tf.float32,
shape=(vocab_size, embedding_dim),
initializer=tf.truncated_normal_initializer(
stddev=0.05, dtype=tf.float32))
if zero_pad:
embeddings = tf.concat((tf.zeros(shape=[1, embedding_dim]),
embeddings[1:, :]), axis=0)
return embeddings