近期需要大量调用谷歌翻译,网上搜集了各种办法,最后还是经常被限制,朋友帮忙申请的300美金的免费额度也用完了,通过本地代理(没钱开那么多真的代理,网上找来的免费资源也不靠谱)绕一下也稍微起点作用,有点江郎才尽,最后发现网上的buildUrl代码有问题, s1, t1, h1换成sl, tl, hl总算可以一直调用了(天下文章一大抄,嘿嘿)。所有资源来自网上,懒得一一回溯了,总归都是感谢。当然,翻译调用几百次之后,最好还是随机delay几分钟,以免被封(不知道会不会,不测试了)。
#!/usr/bin/python
#!--*-- coding:utf-8 --*--
import os
import sys
import time
import random
import requests
import json
#from googletrans import Translator
import googletrans
from translate import Translator
from bs4 import BeautifulSoup
import execjs #必须,需要先用pip 安装,用来执行js脚本
import pickle
from utils import safe_pickle_dump, strip_version, isvalidid, Config
from nltk import data
from nltk.tokenize import sent_tokenize
from google.cloud import translate
from googletrans.gtoken import TokenAcquirer
data.path.append(r"/root/nltk_data")
import urllib.request
from urllib.parse import quote
import http.cookiejar
# crackedjs, crackedpython, google
#translate_method = "google"
#translate_method = "crackedpython"
translate_method = "crackedjs"
#translate_method = "translate-shell"
#translate_method = "mmtranslator"
proxy_en = True
#proxy_en = False
# cracked google translate api
translator = googletrans.Translator()
mmtranslator= Translator(to_lang="zh")
# google translate api
translate_client = None
def get_google_cookie():
headers={
'authority':'translate.google.cn',
'method':'GET',
'path':'/',
'scheme':'https',
'accept':'*/*',
'accept-encoding':'gzip, deflate, br',
'accept-language':'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'pragma': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'x-client-data': '替换成自己chrome浏览器里的捕捉的内容,可能并不重要'
}
cookie_file = './logs/cookie.txt'
cookie_set = ''
try:
cookie = http.cookiejar.MozillaCookieJar(cookie_file)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
request = urllib.request.Request('https://translate.google.cn', headers=headers)
response = opener.open(request)
cookie_set = response.info()['Set-Cookie']
cookie.save(ignore_discard=True, ignore_expires=True)
except Exception as e:
cookie_set=''
print("错误信息:")
print(e)
finally:
return cookie_set
class Py4Js():
def __init__(self):
self.ctx = execjs.compile("""
function TL(a) {
var k = "";
var b = 406644;
var b1 = 3293161072;
var jd = ".";
var $b = "+-a^+6";
var Zb = "+-3^+b+-f";
for (var e = [], f = 0, g = 0; g < a.length; g++) {
var m = a.charCodeAt(g);
128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
e[f++] = m >> 18 | 240,
e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
e[f++] = m >> 6 & 63 | 128),
e[f++] = m & 63 | 128)
}
a = b;
for (f = 0; f < e.length; f++) a += e[f],
a = RL(a, $b);
a = RL(a, Zb);
a ^= b1 || 0;
0 > a && (a = (a & 2147483647) + 2147483648);
a %= 1E6;
return a.toString() + jd + (a ^ b)
};
function RL(a, b) {
var t = "a";
var Yb = "+";
for (var c = 0; c < b.length - 2; c += 3) {
var d = b.charAt(c + 2),
d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
}
return a
}
""")
def getTk(self, text):
return self.ctx.call("TL",text)
def buildUrl(text,tk):
baseUrl='https://translate.google.cn/translate_a/single'
baseUrl+='?client=webapp&'
baseUrl+='sl=auto&'
baseUrl+='tl=zh-CN&'
baseUrl+='hl=zh-CN&'
baseUrl+='dt=at&'
baseUrl+='dt=bd&'
baseUrl+='dt=ex&'
baseUrl+='dt=ld&'
baseUrl+='dt=md&'
baseUrl+='dt=qca&'
baseUrl+='dt=rw&'
baseUrl+='dt=rm&'
baseUrl+='dt=ss&'
baseUrl+='dt=t&'
baseUrl+='ie=UTF-8&'
baseUrl+='oe=UTF-8&'
baseUrl+='otf=1&'
baseUrl+='pc=1&'
baseUrl+='ssel=0&'
baseUrl+='tsel=0&'
baseUrl+='kc=2&'
baseUrl+='tk='+str(tk)+'&'
baseUrl+='q='+text
return baseUrl
def translate_text(method, text, cookie_set):
global translate_client
if method == "translate-shell":
with open('./logs/text.txt', "w") as f:
f.write(text)
f.close()
cmd = 'trans -e google -brief :zh-CN file://./logs/text.txt'
cmdobj = os.popen(cmd, 'r', 1)
text_lines = cmdobj.readlines()
cmdobj.close()
text_cn = ''.join(text_lines)
return text_cn
elif method == "crackedpython":
return translator.translate(line, dest='zh-CN').text
elif method == "mmtranslator":
text_cn = mmtranslator.translate(line)
return '' if text_cn[0:16] == "MYMEMORY WARNING" else text_cn
elif method == "google":
if translate_client is None:
translate_client = translate.Client()
# Translates some text into Chinese
translation = translate_client.translate(
text,
target_language='zh-CN')
return translation['translatedText']
headers={
'authority': 'translate.google.cn',
'method': 'GET',
'path': '',
'scheme': 'https',
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'cookie': cookie_set,
'pragma': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'x-client-data': '替换成自己chrome浏览器里的捕捉的内容,可能并不重要'
}
proxy_dict = {
"http": "http://127.0.0.1:8080",
"https": "http://127.0.0.1:8080"
}
url = buildUrl(quote(text), acquirer.do(text))
#url = buildUrl(quote(text), js.getTk(text))
headers['path'] = url[27:]
res = ''
try:
r = requests.get(url, headers=headers, proxies=(proxy_dict if proxy_en else None))
result=json.loads(r.text)
if result[7]!=None:
# 如果我们文本输错,提示你是不是要找xxx的话,那么重新把xxx正确的翻译之后返回
try:
correctText=result[7][0].replace('<b><i>',' ').replace('</i></b>','')
print(correctText)
correctUrl=buildUrl(correctText,js.getTk(correctText))
headers['path'] = correctUrl[27:]
correctR = requests.get(correctUrl, headers=headers, proxies=(proxy_dict if proxy_en else None))
newResult=json.loads(correctR.text)
res=newResult[0][0][0]
except Exception as e:
print(e)
res=result[0][0][0]
else:
res=result[0][0][0]
except Exception as e:
res=''
print(url)
print("翻译"+text+"失败")
print("错误信息:")
print(e)
finally:
return res
if __name__ == '__main__':
total = 0
if len(sys.argv) > 1:
total = int(sys.argv[1])
if len(sys.argv) > 2:
translate_method = sys.argv[2]
cookie_set = get_google_cookie()
if cookie_set == '':
if os.path.exists('./logs/cookie_set.txt'):
with open('./logs/cookie_set.txt', 'r') as f:
cookie_set = f.read()
else:
print("Get google cookie failed!")
if translate_method == "crackedjs":
print("exit, no cookie for cracked js")
sys.exit(0)
else:
with open('./logs/cookie_set.txt', 'w') as f:
f.write(cookie_set)
js=Py4Js()
acquirer = TokenAcquirer()
print('loading the paper database', Config.db_path)
db = pickle.load(open(Config.db_path, 'rb'))
translate_text(translate_method, 'The quick brown fox jumps over the lazy dog.', cookie_set)