直奔错误记录,{'errorCode': 50}的错误原因,在我的这次学习过程错误是由于请求使用错误,用了get方法,有道翻译是需要用post才能请求:
切记留意请求方式,不要习惯性使用request.get
报错内容:
raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
requests.exceptions.InvalidHeader: Invalid return character or leading space in header: User-Agent
这是因为复制请求头的时候含有空格在内所以导致报错
爬虫js逆向详细步骤:
本次使用到的库
pip install pyexecjs
pip install requests
使用的方法是执行js代码获取得到签名认证的信息,没有还原js原理去获取,因为对js没有太熟悉所以用了此方法
思路:
通过有道的翻译,可以知道是通过实时请求去呈现翻译,所以打开控制台带你network
在输入框中输入内容,就开看见有请求弹出
点击payload可以知道请求带的数据有什么
由此可以看出sign和bv更像是被加密过的数据,大胆猜测全数字的是时间戳
接着就开始搜索逆向js了解他们的由来
由发起者可以看到js文件,点击即可进入树状的文件信息中的此处js文件
点击此处就能看到整理好摊开的js代码,接着搜索需要的数据
至于为什么明确能知道是此处的sign因为能看出是md5加密
而且return的内容正是需要的,下面的lts在下文也说了是等于ts
设置断点,点击翻译进入调试,能看见代码上出现了很多值的数据
由此可以知道e是翻译内容
接着进入pyecharm,新建js文件
将此段代码复制进入,因为是主程序
var r = function(e) {
var t = md5("5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36")
, r = "" + (new Date).getTime()
, i = r + parseInt(10 * Math.random(), 10);
return {
ts: r,
bv: t,
salt: i,
sign: md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
}
};
加上console.log(r('编程'))
去测试发现报错缺少内容:
ReferenceError: md5 is not defined
回到寻找数据因为设置了短点,所以鼠标移过去能看见信息,点击此处就能进入md5的相关代码位置进行复制
(切记获取方法位置能弹出信息框是因为设置了短点,在随后需要找方法位置但是不出现时因为没有设置断点设置即可)
接着运行代码,缺少什么就去找什么一部分一部分的复制
获取后完整的js代码:
var n = function(e, t) {
return e << t | e >>> 32 - t
}
, r = function(e, t) {
var n, r, i, o, a;
return i = 2147483648 & e,
o = 2147483648 & t,
n = 1073741824 & e,
r = 1073741824 & t,
a = (1073741823 & e) + (1073741823 & t),
n & r ? 2147483648 ^ a ^ i ^ o : n | r ? 1073741824 & a ? 3221225472 ^ a ^ i ^ o : 1073741824 ^ a ^ i ^ o : a ^ i ^ o
}
, i = function(e, t, n) {
return e & t | ~e & n
}
, o = function(e, t, n) {
return e & n | t & ~n
}
, a = function(e, t, n) {
return e ^ t ^ n
}
, s = function(e, t, n) {
return t ^ (e | ~n)
}
, l = function(e, t, o, a, s, l, c) {
return e = r(e, r(r(i(t, o, a), s), c)),
r(n(e, l), t)
}
, c = function(e, t, i, a, s, l, c) {
return e = r(e, r(r(o(t, i, a), s), c)),
r(n(e, l), t)
}
, u = function(e, t, i, o, s, l, c) {
return e = r(e, r(r(a(t, i, o), s), c)),
r(n(e, l), t)
}
, d = function(e, t, i, o, a, l, c) {
return e = r(e, r(r(s(t, i, o), a), c)),
r(n(e, l), t)
}
, f = function(e) {
for (var t, n = e.length, r = n + 8, i = 16 * ((r - r % 64) / 64 + 1), o = Array(i - 1), a = 0, s = 0; s < n; )
a = s % 4 * 8,
o[t = (s - s % 4) / 4] = o[t] | e.charCodeAt(s) << a,
s++;
return t = (s - s % 4) / 4,
a = s % 4 * 8,
o[t] = o[t] | 128 << a,
o[i - 2] = n << 3,
o[i - 1] = n >>> 29,
o
}
, p = function(e) {
var t, n = "", r = "";
for (t = 0; t <= 3; t++)
n += (r = "0" + (e >>> 8 * t & 255).toString(16)).substr(r.length - 2, 2);
return n
}
, h = function(e) {
e = e.replace(/\x0d\x0a/g, "\n");
for (var t = "", n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128)
t += String.fromCharCode(r);
else if (r > 127 && r < 2048)
t += String.fromCharCode(r >> 6 | 192),
t += String.fromCharCode(63 & r | 128);
else if (r >= 55296 && r <= 56319) {
if (n + 1 < e.length) {
var i = e.charCodeAt(n + 1);
if (i >= 56320 && i <= 57343) {
var o = 1024 * (r - 55296) + (i - 56320) + 65536;
t += String.fromCharCode(240 | o >> 18 & 7),
t += String.fromCharCode(128 | o >> 12 & 63),
t += String.fromCharCode(128 | o >> 6 & 63),
t += String.fromCharCode(128 | 63 & o),
n++
}
}
} else
t += String.fromCharCode(r >> 12 | 224),
t += String.fromCharCode(r >> 6 & 63 | 128),
t += String.fromCharCode(63 & r | 128)
}
return t
};
var md5 = function(e) {
var t, n, i, o, a, s, m, g, v, y = Array();
for (e = h(e),
y = f(e),
s = 1732584193,
m = 4023233417,
g = 2562383102,
v = 271733878,
t = 0; t < y.length; t += 16)
n = s,
i = m,
o = g,
a = v,
s = l(s, m, g, v, y[t + 0], 7, 3614090360),
v = l(v, s, m, g, y[t + 1], 12, 3905402710),
g = l(g, v, s, m, y[t + 2], 17, 606105819),
m = l(m, g, v, s, y[t + 3], 22, 3250441966),
s = l(s, m, g, v, y[t + 4], 7, 4118548399),
v = l(v, s, m, g, y[t + 5], 12, 1200080426),
g = l(g, v, s, m, y[t + 6], 17, 2821735955),
m = l(m, g, v, s, y[t + 7], 22, 4249261313),
s = l(s, m, g, v, y[t + 8], 7, 1770035416),
v = l(v, s, m, g, y[t + 9], 12, 2336552879),
g = l(g, v, s, m, y[t + 10], 17, 4294925233),
m = l(m, g, v, s, y[t + 11], 22, 2304563134),
s = l(s, m, g, v, y[t + 12], 7, 1804603682),
v = l(v, s, m, g, y[t + 13], 12, 4254626195),
g = l(g, v, s, m, y[t + 14], 17, 2792965006),
m = l(m, g, v, s, y[t + 15], 22, 1236535329),
s = c(s, m, g, v, y[t + 1], 5, 4129170786),
v = c(v, s, m, g, y[t + 6], 9, 3225465664),
g = c(g, v, s, m, y[t + 11], 14, 643717713),
m = c(m, g, v, s, y[t + 0], 20, 3921069994),
s = c(s, m, g, v, y[t + 5], 5, 3593408605),
v = c(v, s, m, g, y[t + 10], 9, 38016083),
g = c(g, v, s, m, y[t + 15], 14, 3634488961),
m = c(m, g, v, s, y[t + 4], 20, 3889429448),
s = c(s, m, g, v, y[t + 9], 5, 568446438),
v = c(v, s, m, g, y[t + 14], 9, 3275163606),
g = c(g, v, s, m, y[t + 3], 14, 4107603335),
m = c(m, g, v, s, y[t + 8], 20, 1163531501),
s = c(s, m, g, v, y[t + 13], 5, 2850285829),
v = c(v, s, m, g, y[t + 2], 9, 4243563512),
g = c(g, v, s, m, y[t + 7], 14, 1735328473),
m = c(m, g, v, s, y[t + 12], 20, 2368359562),
s = u(s, m, g, v, y[t + 5], 4, 4294588738),
v = u(v, s, m, g, y[t + 8], 11, 2272392833),
g = u(g, v, s, m, y[t + 11], 16, 1839030562),
m = u(m, g, v, s, y[t + 14], 23, 4259657740),
s = u(s, m, g, v, y[t + 1], 4, 2763975236),
v = u(v, s, m, g, y[t + 4], 11, 1272893353),
g = u(g, v, s, m, y[t + 7], 16, 4139469664),
m = u(m, g, v, s, y[t + 10], 23, 3200236656),
s = u(s, m, g, v, y[t + 13], 4, 681279174),
v = u(v, s, m, g, y[t + 0], 11, 3936430074),
g = u(g, v, s, m, y[t + 3], 16, 3572445317),
m = u(m, g, v, s, y[t + 6], 23, 76029189),
s = u(s, m, g, v, y[t + 9], 4, 3654602809),
v = u(v, s, m, g, y[t + 12], 11, 3873151461),
g = u(g, v, s, m, y[t + 15], 16, 530742520),
m = u(m, g, v, s, y[t + 2], 23, 3299628645),
s = d(s, m, g, v, y[t + 0], 6, 4096336452),
v = d(v, s, m, g, y[t + 7], 10, 1126891415),
g = d(g, v, s, m, y[t + 14], 15, 2878612391),
m = d(m, g, v, s, y[t + 5], 21, 4237533241),
s = d(s, m, g, v, y[t + 12], 6, 1700485571),
v = d(v, s, m, g, y[t + 3], 10, 2399980690),
g = d(g, v, s, m, y[t + 10], 15, 4293915773),
m = d(m, g, v, s, y[t + 1], 21, 2240044497),
s = d(s, m, g, v, y[t + 8], 6, 1873313359),
v = d(v, s, m, g, y[t + 15], 10, 4264355552),
g = d(g, v, s, m, y[t + 6], 15, 2734768916),
m = d(m, g, v, s, y[t + 13], 21, 1309151649),
s = d(s, m, g, v, y[t + 4], 6, 4149444226),
v = d(v, s, m, g, y[t + 11], 10, 3174756917),
g = d(g, v, s, m, y[t + 2], 15, 718787259),
m = d(m, g, v, s, y[t + 9], 21, 3951481745),
s = r(s, n),
m = r(m, i),
g = r(g, o),
v = r(v, a);
return (p(s) + p(m) + p(g) + p(v)).toLowerCase()
}
var r = function(e) {
var t = md5("5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36")
, r = "" + (new Date).getTime()
, i = r + parseInt(10 * Math.random(), 10);
return {
ts: r,
bv: t,
salt: i,
sign: md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
}
};
console.log(r('编程'))
接着运行会发现报错了
RangeError: Maximum call stack size exceeded
这个原因是因为有方法名字重复了
修改函数方法名字即可,修改主函数代码名
随后运行即可发现需要的数据出现了,也就是等于还原获取数据证书成功!!!
接着即可使用python代码,利用execjs执行js代码,获取内容接着发起post请求即可:
python代码:
import requests
import execjs
url = 'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
headers = {
'Cookie':'OUTFOX_SEARCH_USER_ID=603712987@10.108.162.139; OUTFOX_SEARCH_USER_ID_NCOO=2073940466.0197437; ___rl__test__cookies=1665489625147,',
'Host':'fanyi.youdao.com',
'Origin':'https://fanyi.youdao.com',
'Referer':'https://fanyi.youdao.com/',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
}
word = input("输入内容:")
js_code = open("youdao.js",encoding="utf-8").read()
compile_code = execjs.compile(js_code)
json_data = compile_code.call('ri',word)
print(json_data)
data = {
'i': word,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client':'fanyideskweb',
'salt':json_data["salt"],
'sign':json_data["sign"],
'lts':json_data["ts"],
'bv':json_data["bv"],
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_CLICKBUTTION',
}
dat = requests.post(url=url,data=data,headers=headers)
print(dat.json())