Python3 有道云翻译反爬虫
反爬虫原理取自下面这篇文章
https://blog.csdn.net/nunchakushuang/article/details/75294947
-----------------------------分割线-----------------------------------------------
大佬图挂了,我自己补充下
右键查看源码 搜索.js文件 各个js文件中有salt 的是我们需要 最终排查到fanyi.min.js文件
美化后找到关键代码
这样就顺利知道了 salt sign的值
----------------------------------------------分割线----------------------------------------------------------------
实验之后发现有道云做了部分更改,现将变更的内容及代码贴出如下:
1.chrome F12 追踪post请求
发现除了上述链接文章中提出的 salt sign 之外,还有ts 字段是变化的
ts的值是salt的值减去最后一位
2.headers 如果只保留User-Agent字段 会报 {‘errorCode’: 50}
3.查询语句 和 查询单词 返回结果结构不同
import requests
import json
import time
import random
import hashlib
def main():
while True:
url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
form = {
'i': 'hello',
'from': 'AUTO',
'to': 'AUTO',
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': '15546457655690',
'sign': 'ebfa6cf4a5e08a0e60068545270d662a',
'ts': '1554642745594',
'bv': 'd6c3cd962e29b66abe48fcb8f4dd7f7d',
'doctype': 'json',
'version': '2.1',
'keyfrom': 'fanyi.web',
'action': 'FY_BY_REALTlME',
'typoResult': 'false'
}
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '255',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'OUTFOX_SEARCH_USER_ID=1658584979@10.169.0.84; OUTFOX_SEARCH_USER_ID_NCOO=1097007475.90215; JSESSIONID=aaa-boq2ULGWp89ZhV6Nw; ___rl__test__cookies=1554690491011',
'Host': 'fanyi.youdao.com',
'Origin': 'http://fanyi.youdao.com',
'Referer': 'http://fanyi.youdao.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
content = input('请输入需要翻译的句子:')
#content = 'i need help now'
#content = 'hello'
if content == '':
print("拜拜~\n")
break
u = 'fanyideskweb'
d = content
f = str(int(time.time()*1000) + random.randint(1,10))
c = '1L5ja}w$puC.v_Kz3@yYn' #从fanyi.js中查到
sign = hashlib.md5((u + d + f + c).encode('ascii')).hexdigest()
form['salt'] = f
form['sign'] = sign
form['i'] = content
form['ts'] = f[0:(len(f)-1)]
response = requests.post(url , data = form , headers = headers)
response.encoding = response.apparent_encoding
result = response.json()
#print( result)
if result.get('smartResult') is None :
print(result['translateResult'][0][0]['tgt'])
else:
print( result['smartResult']['entries'][1])
if __name__ == '__main__':
main()
最后,欢迎各位大佬交流改进?