1.输入内容映射为ID-基于单个字进行映射
paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_ids
def convert_tokens_to_ids(self, tokens):
if tokens is None:
return None
if isinstance(tokens, str):
if tokens in self.added_tokens_encoder:
return self.added_tokens_encoder[tokens]
else:
return self._convert_token_to_id(tokens)
ids = []
for token in tokens:
if token in self.added_tokens_encoder:
ids.append(self.added_tokens_encoder[token])
else:
ids.append(self._convert_token_to_id(token))
return ids