使用术语表
- Google的控制台的storage创建bucket;
- 新建glossary术语表文件( TSV、CSV 或 TMX 文件),上传到该bucket;文件格式(https://cloud.google.com/translate/docs/advanced/glossary)
- 调用函数创建术语资源(下图);
- 使用术语表翻译文本。
from google.cloud import translate_v3 as translate
'''
PROJECT_NUMBER_OR_ID:您的 Google Cloud 项目编号或 ID
glossary-id:您的术语库 ID,例如 my_en_ru_glossary,
bucket-name:您的术语库文件所在的存储分区的名称,
glossary-filename:术语库的文件名,
input_uri:glossary文件上传到bucket之后会生成url,把http改成gs,
glossary_id:文件名
'''
def create_glossary(project_id="你的project_id",input_uri="gs://**glossary.csv",glossary_id="en-to-zh-cn-glossary",timeout=180,):
"""
Create a equivalent term sets glossary. Glossary can be words or
short phrases (usually fewer than five words).
https://cloud.google.com/translate/docs/advanced/glossary#format-glossary
"""
client = translate.TranslationServiceClient()
# Supported language codes: https://cloud.google.com/translate/docs/languages
source_lang_code = "en"
target_lang_code = "zh-CN"
location = "us-central1" # The location of the glossary
name = client.glossary_path(project_id, location, glossary_id)
language_codes_set = translate.types.Glossary.LanguageCodesSet(
language_codes=[source_lang_code, target_lang_code]
)
gcs_source = translate.types.GcsSource(input_uri=input_uri)
input_config = translate.types.GlossaryInputConfig(gcs_source=gcs_source)
glossary = translate.types.Glossary(
name=name, language_codes_set=language_codes_set, input_config=input_config
)
parent = f"projects/{project_id}/locations/{location}"
# glossary is a custom dictionary Translation API uses
# to translate the domain-specific terminology.
operation = client.create_glossary(parent=parent, glossary=glossary)
print(1)
result = operation.result(timeout)
print("Created: {}".format(result.name))
print("Input Uri: {}".format(result.input_config.gcs_source.input_uri))
# 执行函数
create_glossary()
# Imports the Google Cloud Translation library
from google.cloud import translate
# Initialize Translation client
# 翻译
def translate_text(text, source_language_code, target_language_code, project_id="你的project_id"):
'''
text: 要翻译的文本,
source_language_code:文本源语言,
target_language_code:要翻译成的语言,
project_id:项目ID(在Google控制台),
'''
client = translate.TranslationServiceClient()
location = "global"
parent = f"projects/{project_id}/locations/{location}"
# Translate text from English to French
# Detail on supported types can be found here:
# https://cloud.google.com/translate/docs/supported-formats
response = client.translate_text(
request={
"parent": parent,
"contents": [text],
"mime_type": "text/plain", # mime types: text/plain, text/html
"source_language_code": source_language_code,
"target_language_code": target_language_code,
}
)
# Display the translation for each input text provided
for translation in response.translations:
return translation.translated_text
# print("Translated text: {}".format(translation.translated_text))
def translate_text_with_glossary(text, source_language_code, target_language_code, project_id="你的project_id", glossary_id="文件名",):
"""Translates a given text using a glossary."""
'''
text: 要翻译的文本,
source_language_code:文本源语言,
target_language_code:要翻译成的语言,
project_id:项目ID(在Google控制台),
glossary_id:术语表ID
'''
client = translate.TranslationServiceClient()
location = "us-central1"
parent = f"projects/{project_id}/locations/{location}"
glossary = client.glossary_path(
project_id, "us-central1", glossary_id # The location of the glossary
)
glossary_config = translate.TranslateTextGlossaryConfig(glossary=glossary)
# Supported language codes: https://cloud.google.com/translate/docs/languages
response = client.translate_text(
request={
"contents": [text],
"target_language_code": target_language_code,
"source_language_code": source_language_code,
"parent": parent,
"glossary_config": glossary_config,
}
)
print("Translated text: \n")
for translation in response.glossary_translations:
print("\t {}".format(translation.translated_text))