# -*- conding: utf-8 -*-
"""
使用post请求,调用百度翻译
使用的是:
urllib.request.Request
urllib.request.urlopen()
urllib.request.urlencode()
urlopen()方法中的url参数可以是字符串,也可以是一个Request对象
文中需要js加密,来自下面大佬链接
https://www.cnblogs.com/share-record/p/10724408.html
"""
import urllib
from urllib.request import Request, urlopen
from urllib.parse import urlencode
import json
import ssl
import execjs
"""
当出现报错:<urlopen error [SSL: BAD_SIGNATURE] bad signature (_ssl.c:1056)>
可多尝试几次,或者将下面的这行代再复制一行
"""
ssl._create_default_https_context = ssl._create_unverified_context # 这是加密,不加会提示SSL类型的错误
query = input("请输入要翻译的英文: ")
# 定义请求头,里面必需要有user-agent和cookie,这个是不会改变的
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'cookie': 'BIDUPSID=D2B8972B8BB529C1B149B966F9B10202; PSTM=1587293381; BAIDUID=D2B8972B8BB529C11EEF8BCDFF44331C:FG=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; HISTORY_SWITCH=1; SOUND_PREFER_SWITCH=1; MCITY=-194%3A; BDUSS=WJHbEdIflh1aVJENDBCcXRNMmM1STA2TnQtcDQxdkI3flhPSloyZ1FKNFhBckpmSVFBQUFBJCQAAAAAAAAAAAEAAABGOVyv0MTW0LXExMe49rXItP0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABd1il8XdYpfQ; BDUSS_BFESS=WJHbEdIflh1aVJENDBCcXRNMmM1STA2TnQtcDQxdkI3flhPSloyZ1FKNFhBckpmSVFBQUFBJCQAAAAAAAAAAAEAAABGOVyv0MTW0LXExMe49rXItP0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABd1il8XdYpfQ; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1603121480,1603721041,1604501323,1604501345; H_PS_PSSID=32819_1462_33038_32951_33061_31254_32723_32962_32957; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=6; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1605285352; __yjsv5_shitong=1.0_7_9f5a013d775e541fd4462b27e1c97346b19e_300_1605285353200_183.37.158.56_c7ed83fa; yjs_js_security_passport=7a93f32a61e5ab0509acdf625c875bb417862a4a_1605285354_js'
}
def create_sign(query):
# 这里为js加密,暂时没有看懂
jsCode = """
function a(r) {
if (Array.isArray(r)) {
for (var o = 0, t = Array(r.length); o < r.length; o++)
t[o] = r[o];
return t
}
return Array.from(r)
}
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
var i = null;
function e(r) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
var u = void 0, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = '320305.131321201' || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
"""
sign = execjs.compile(jsCode).call('e', query)
return sign
def is_Chinese():
"""
这里时判断输入的是否为中文,为中文返回True,否则返回Flase
"""
for ch in query:
if '\u4e00' <= ch <= '\u9fff':
return True
return False
# 定义执行函数
def fanyi():
"""
data参数是一个字节(byte)类型
query: 为要翻译的英文,这里为英译中,输入中文不能翻译为英文
params: 参数必须要添加,这是传参 From和To指的是从什么语言翻译到什么语言,zh:表示中文;en:表示英文
data: 是将params转换为字节类型,传参只支持字节(byte)类型
res: 为调用urllib中的request中的Request方法,使用的是POST请求方式,返回的是是一个<urllib.request.Request object at 0x0000028FAE4E5BE0>
resp: 通过urllib中的request中的urlopen方式去打开返回的一个Request对象,返回的是一个<http.client.HTTPResponse object at 0x000002256D154E48>
respones: 读取urlopen返回的一个HTTPResponse对象,将其转化为可阅读的一个json格式,但是存在一个编码的问题,我们再通过json的loads方法
将编码转化为utf-8格式的,便于阅读
"""
if is_Chinese():
From = 'zh'
To = 'en'
else:
From = 'en'
To = 'zh'
url = 'https://fanyi.baidu.com/v2transapi?from={}&to={}'.format(From, To) # 这是百度翻译的url
sign = create_sign(query)
params = {
'from': From,
'query': query,
'to': To,
'token': '186f535b2be9fd3f2abbc75dc3fa63e4',
'sign': sign
}
try:
data = bytes(urlencode(params), encoding='utf-8')
res = Request(url, data=data, headers=headers, method='POST')
resp = urlopen(res)
respones = resp.read()
respones = json.loads(respones.decode('utf-8'))
except Exception as Error:
print('请求时错误!!!您的请求返回\n %s' %(Error))
exit()
try:
translation = respones['trans_result']['data'][0]['dst']
print('您想要翻译的 %s 的译文是 %s' %(query, translation))
print(respones)
except Exception as Error:
print('请求错误!!!您请求获取到的值为:\n%s' %(respones))
if __name__ == '__main__':
fanyi()
通过urllib库爬取百度翻译,实现中文翻译为英文,英文翻译为中文
最新推荐文章于 2024-03-20 16:21:42 发布