import tiktoken
# 加载 GPT-3.5 Turbo 的编码器
cl100k_base = tiktoken.get_encoding("gpt2")
# In production, load the arguments directly instead of accessing private attributes
# See openai_public.py for examples of arguments for specific encodings
enc = tiktoken.Encoding(
# If you're changing the set of special tokens, make sure to use a different name
# It should be clear from the name what behaviour to expect.
name="gggg",
pat_str=cl100k_base._pat_str,
mergeable_ranks=cl100k_base._mergeable_ranks,
special_tokens={
**cl100k_base._special_tokens,
"<|txtend|>": 100264,
}
)
enc.encode("\n<|txtend|>",allowed_special="all")