左侧输入文字后点击翻译 找到xhr的那条请求
浏览器向有道翻译的一个接口发出了POST请求并且得到了正常响应
action keyfrom 区分客户端类型
sing,salt,ts,bv可能是随机生成的反爬虫字符串
请求发现网页加载了fanyi.min.js的文件 找到对应的sign ts bv salt
然后继续往下找 找到用于生成sign,bv,salt,ts的方法
var r = function(e) {
var t = n.md5(navigator.appVersion)
, r = "" + (new Date).getTime()
, i = r + parseInt(10 * Math.random(), 10);
return {
ts: r,
bv: t,
salt: i,
sign: n.md5("fanyideskweb" + e + i + "Nw(nmmbP%A-r6U3EUn]Aj")
}
r + parseInt(10 * Math.random(), 10);
ts :r = "" + (new Date).getTime() 获取当前时间的时间戳
bv:t = n.md5(navigator.appVersion) 获取用MD5加密的浏览器信息
salt:i =r + parseInt(10 * Math.random(), 10); 将当前时间戳和0-9之间的随机数字组合成新的字符串
sign = n.md5("fanyideskweb" + e + i + "Nw(nmmbP%A-r6U3EUn]Aj") 获取MD5的值
下面具体代码
import requests
from time import time
import random
import hashlib
def md5(i):
# 创建MD5对象
md5 = hashlib.md5()
# 加密字符串
md5.update(bytes(i, encoding="utf-8"))
# 返回16位的加密
return md5.hexdigest()
def youdao(e):
# ts :r = "" + (new Date).getTime() 获取当前时间的时间戳
ts = str(int(time() * 1000))
print("ts:",ts)
# salt:i =r + parseInt(10 * Math.random(), 10);
salt = str(int(time() * 1000)) + str(random.randint(0, 9))
print("salt:",salt)
# sign = n.md5("fanyideskweb" + e + i + "Nw(nmmbP%A-r6U3EUn]Aj")
sign_test = "fanyideskweb" + e + salt + "@6f#X3=cCuncYssPsuRUE"
sign = md5(sign_test)
print("sign:",sign)
#bv:t = n.md5(navigator.appVersion)
# 定义data参数
data = {
"i": word,
"from": "AUTO",
"to": "AUTO",
"smartresult": "dict",
"client": "fanyideskweb",
"salt": salt,
"sign": sign,
"ts": ts,
# "bv": "942cd17bf95d3ff6cb07a988ab9c18f8",
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME"
}
# 加上请求头 浏览器信息
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
# "Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "238",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie":"OUTFOX_SEARCH_USER_ID=-1933230275@10.169.0.83; JSESSIONID=aaamdNeFpZewW-3JKP6gx; OUTFOX_SEARCH_USER_ID_NCOO=1345727124.6815813; ___rl__test__cookies=1587977327313",
"Host": "fanyi.youdao.com",
"Origin": "http://fanyi.youdao.com",
"Referer": "http://fanyi.youdao.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
# 定义起始url
base_url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
# 发送请求
response = requests.post(base_url, data=data, headers=headers)
# 获取response里面的json数据
json_data = response.json()
result = json_data['translateResult'][0][0]['tgt']
print("翻译的单词:",word + " " +"结果:"+result)
if __name__ == '__main__':
word = input("请输入需要翻译的内容:")
# i = "banana"
youdao(word)
执行会发现数据{"errorcode":50}
我们只需要将base_url 里面的_o去掉就可以成功获取啦