使用facebook的开源翻译模型
测试一下
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# 加载预训练的分词器和模型
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B", use_auth_token=True, src_lang="ron_Latn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B", use_auth_token=True)
# 要翻译的罗马尼亚语文本
article = "Şeful ONU spune că nu există o soluţie militară în Siria"
# 使用分词器对文本进行编码,将文本转换为模型输入所需的张量格式
inputs = tokenizer(article, return_tensors="pt")
# 生成翻译的令牌序列,强制生成英语 (eng_Latn) 作为目标语言
translated_tokens = model.generate(
**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], max_length=30
)
# 将生成的令牌序列解码为可读的文本
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
# 打印翻译结果
print(translated_text)