利用正则截取页面结果
# coding=utf-8
import re
import sys
import requests
from future.backports.urllib import parse
GOOGLE_TRANSLATE_URL = 'http://translate.google.cn/m?q=%s&tl=%s&sl=%s'
def translate(text, text_language="auto", to_language="auto"):
text = parse.quote(text)
url = GOOGLE_TRANSLATE_URL % (text, to_language, text_language)
response = requests.get(url)
data = response.text
expr = r'(?s)class="(?:t0|result-container)">(.*?)<'
result = re.findall(expr, data)
if len(result) == 0:
return ""
return result[0].encode("utf-8")
def main(args):
text = '期数'
text = translate(text, "zh-CN", "en")
print(text)
text = translate(text, "en", "zh-CN")
print(text)
if __name__ == '__main__':
main(sys.argv[1:])
调接口取结果
# coding=utf-8
import sys
from urllib2 import build_opener, ProxyHandler
import requests
import json
# requests代理设置:
#pip install urllib3==1.25.11
import urllib3
PATH = "https://translate.googleapis.com/translate_a/single"
CLIENT = "gtx"
USER_AGENT = "Mozilla/5.0"
LANGUAGES = {'af': 'afrikaans', 'sq': 'albanian', 'am': 'amharic', 'ar': 'arabic', 'hy': 'armenian',
'az': 'azerbaijani', 'eu': 'basque', 'be': 'belarusian', 'bn': 'bengali', 'bs': 'bosnian',
'bg': 'bulgarian', 'ca': 'catalan', 'ceb': 'cebuano', 'ny': 'chichewa', 'zh-cn': 'chinese (simplified)',
'zh-tw': 'chinese (traditional)', 'co': 'corsican', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish',
'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish',
'fr': 'french', 'fy': 'frisian', 'gl': 'galician', 'ka': 'georgian', 'de': 'german', 'el': 'greek',
'gu': 'gujarati', 'ht': 'haitian creole', 'ha': 'hausa', 'haw': 'hawaiian', 'iw': 'hebrew', 'hi': 'hindi',
'hmn': 'hmong', 'hu': 'hungarian', 'is': 'icelandic', 'ig': 'igbo', 'id': 'indonesian', 'ga': 'irish',
'it': 'italian', 'ja': 'japanese', 'jw': 'javanese', 'kn': 'kannada', 'kk': 'kazakh', 'km': 'khmer',
'ko': 'korean', 'ku': 'kurdish (kurmanji)', 'ky': 'kyrgyz', 'lo': 'lao', 'la': 'latin', 'lv': 'latvian',
'lt': 'lithuanian', 'lb': 'luxembourgish', 'mk': 'macedonian', 'mg': 'malagasy', 'ms': 'malay',
'ml': 'malayalam', 'mt': 'maltese', 'mi': 'maori', 'mr': 'marathi', 'mn': 'mongolian',
'my': 'myanmar (burmese)', 'ne': 'nepali', 'no': 'norwegian', 'ps': 'pashto', 'fa': 'persian',
'pl': 'polish', 'pt': 'portuguese', 'pa': 'punjabi', 'ro': 'romanian', 'ru': 'russian', 'sm': 'samoan',
'gd': 'scots gaelic', 'sr': 'serbian', 'st': 'sesotho', 'sn': 'shona', 'sd': 'sindhi', 'si': 'sinhala',
'sk': 'slovak', 'sl': 'slovenian', 'so': 'somali', 'es': 'spanish', 'su': 'sundanese', 'sw': 'swahili',
'sv': 'swedish', 'tg': 'tajik', 'ta': 'tamil', 'te': 'telugu', 'th': 'thai', 'tr': 'turkish',
'uk': 'ukrainian', 'ur': 'urdu', 'uz': 'uzbek', 'vi': 'vietnamese', 'cy': 'welsh', 'xh': 'xhosa',
'yi': 'yiddish', 'yo': 'yoruba', 'zu': 'zulu', 'fil': 'Filipino', 'he': 'Hebrew'}
def google_translate(text, src, dest):
url = "{PATH}?client={CLIENT}&sl={sourceLang}&tl={targetLang}&dt=t&q={text}".format(PATH=PATH, CLIENT=CLIENT,
sourceLang=src, targetLang=dest,
text=text)
headers = {
'User-Agent': USER_AGENT,
}
urllib3.disable_warnings()
res = requests.post(url, headers=headers, verify=False)
if res.status_code == 200:
return json.loads(res.text.encode("utf-8"))[0][0][0].encode("utf-8")
def main(args):
text = '期数'
text = google_translate(text, "zh-CN", "en")
print(text)
text = google_translate(text, "en", "zh-CN")
print(text)
if __name__ == '__main__':
main(sys.argv[1:])