Helsinki-NLP
https://hf-mirror.com/Helsinki-NLP/opus-mt-zh-en
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
model = AutoModelForSeq2SeqLM.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en").eval()
tokenizer = AutoTokenizer.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en")
def translate(text):
with torch.no_grad():
encoded = tokenizer([text], return_tensors="pt")
sequences = model.generate(**encoded)
return tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]
input = "青春不能回头,所以青春没有终点。,.,/'[' ——《火影忍者》"
print(translate(input))
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en")
model = AutoModelForSeq2SeqLM.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en")
content = ['你好 世界', '哈哈哈哈']
inputs = tokenizer(content, return_tensors="pt", padding=True)
translated_tokens = model.generate(**inputs, )
for translated in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
print(translated)
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
tokenizer = AutoTokenizer.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en")
model = AutoModelForSeq2SeqLM.from_pretrained("/data/models/Helsinki-NLP/opus-mt-zh-en")
translator = pipeline(
'translation',
model=model,
tokenizer=tokenizer,
src_lang='zho_Hans',
tgt_lang='eng_Latn',
max_length=512
)
print(translator(["你好 世界", "青春", ]))