本次记录扣js核心算法
爬取思路:
1.找到请求体,获得数据链接
2.将请求体带上尝试请求,研究请求情况(一段时间cookie会失效)
3.尝试请求体情况,得到问题所在
4.开始逆向hook需要的核心请求体
5.不断调试与扣js代码
6.得到数据后,构思如何失效后自动更新
找到请求体:
数据在里面多层嵌套最终在data就能看见内容
请求链接:Request URL:http://www.iwencai.com/customized/chart/get-robot-data
尝试请求:
将data和headers带上:
data = {
"question":"20221030涨停",
"perpage":50,
"page":1,
"secondary_intent":"",
"log_info":{"input_type":"typewrite"},
"source":"Ths_iwencai_Xuangu",
"version":"2.0",
"query_area":"",
"block_list":"",
"add_info":{
"urp":{
"scene":1,
"company":1,
"business":1
},
"contentType":"json",
"searchInfo":True
},
"rsh":"Ths_iwencai_Xuangu_xdoaiz62bgbofsagvw5mfuwnxmdl7mto",
}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"Content-Type": "application/json",
"Referer": "http://www.iwencai.com/unifiedwap/result?w=20221030%E6%B6%A8%E5%81%9C",
"cookie": f"v={hexin_v}",
}
resp = requests.post(url=url,data=json.dumps(data),headers=headers).text
print(resp)
尝试请求体情况,得到问题所在:
仔细研究cookie,再加上测试,发现cookie中可以减少参数,最终只剩下v=即可
然后,v后面的数据又是hexin-v一模一样,由此可以断定是拼接上去从而检测的,再测试还发现不带cookie带heixn-v一样可以请求,由此即可明白请求原理,要解决就是heixn-v
逆向hook需要的核心请求体:
这里就运用到了hook请求头,当发现hexin-v时候捕获:
func_ = window.XMLHttpRequest.prototype.setRequestHeader;
window.XMLHttpRequest.prototype.setRequestHeader = function(name,value){
if (name === "hexin-v"){
debugger
}
return func_.apply(this,[name,value]);
}
//将代码复制到浏览器,接着再次点击请求
找到核心算法实现处:
上一层就是实现处原理:
接着从代码中获知rt.update就是hexin-v的来源验证一下
将生成的放入请求中,请求成功获取
接着定位到核心rt处,进行扣代码!!!(耐心出奇迹)
代码:
//var TOKEN_SERVER_TIME = 1667085000.423;
var document = {}
var window = {}
second = [1, "", 0, "he", "ad", 29, "\x180G\x1f", "?>=<;:\\\\/,+", "ng", "to", "ff", Number, Error, "11", "6", "er", "ro", "code", "co", "_?L", "ed", "@S\x15D*", Object, "len", "gth", "on", "lo", RegExp, "ySta", 13, "eel", "ee", "ouse", "ll", "\u2544\u2530\u2555\u2531", "FCm-", "isTru", "getC", "Pos", "ve", "or", "ae", "^", "On", "Sho", "can", "ont", "roid", "anguage", "\u2502", "ta", "tna", Date, "3", "am", "e", "n+", "f80", "\x1dD", 6, "\u255f\u253a\u2542\u252b\u2545\u2568\u251e", "KCABLLAC_NOELEMAHC", "X-Antispider-Message", 3, ".baidu.", Function, document, !0, "cookie", "; ", "=", 96, "\u255b\u253e\u2550\u2537\u2543\u252b", "\u250c\u252c\u255c\u253d\u2549\u2521\u251c", ";O", "; expires=", "getCookie", "Thu, 01 Jan 1970 00:00:00 GMT", "setCookie", "Z\x18|", "i", "\u255b\u2534\u2557\u2536\u255a\u2509\u257d\u2512\u2560\u2501\u2566\u2503", 52, window, 10, "Init", !1, "set", "v", "eliflmth", '<script>document.w=window<\/script><iframe src="/favicon.icon"></iframe>', "iS.p", "head", "#default#userData", "get", "[!\"#$%&'()*", "g", "^d", "$D", "\u2568\u2537\u2568\u254c\u256a", "]\\P", "___", "le", "th", "prototype", "base_f", 8, "\\R5Z\\R\x14@^Q3G", "ZV%PgQ?Y]S%", 67, "r", "length", "0", 16, "12", "\u2576\u095f\u0979\u09d5\u0995\u091b\u09a9\u09f9\u09bd\u09f7\u0989\u09fd\u09f5\u09f3\u09f9\u0a41\u0a4d\u098f\u0999\u0905\u0975\u09cb\u09a9\u09a9\u099d\u0927\u0933\u0913\u0a6b\u0999\u09a3\u0937\u098b\u09f5\u0933\u0a7b\u091b\u09b1\u0a63\u095f\u09fb\u094d\u0993\u0943\u092b\u0949\u09a3\u09e7\u09cb\u0925\u0993\u09ab\u09f0\u092c\u092c\u0942\u0950\u09c8\u0944\u09c6\u0990\u0944\u09cb\u098e", "i,", "\u2505\u092f", 12, 56, "20", "1000", 2, 5, "11111111", "encode", "\u255b\u0972\u0959", "\u2519", "s", "WY$PYS", "ystate", "1111101000", / /g, ",", "\u250d", '^".*"$', "edoc_sutats", "status_code", "location", "redirect_url", "href", "4294967295", "j", "1200000", "script", "src", "onreadystatechange", "read", "loaded", "readyState", "complete", "interactive", "onload", "undefined", "\\.com\\.cn$|\\.com\\.hk$", ".", "getServerTime", 'YY7YAD?FjD"', "strhash", "random", "getRootDomain", "booleanToDecimal", "timeNow", "\u2559\u253e", "eventBind", "onwh", "\u255b", 46, "DOMM", "cl", "T^5^", "div", "onmousewheel", "mousewheel", 51, "keydown", "clientY", "getKeyDown", "ch", "plu", "\u2543\u252b", "ouc", "art", "^i", "Po", "callPhantom", "max", "Hei", "ActiveXObject", "nd", "yG&Y]\x17\x15ZUG#A]Ez\x15qY5\x1b", "\u2576\u097e\u094e\u09f8\u09a6\u0938\u09b6\u09fe\u0996\u09d7\u09a7\u09d2\u09cc", "Maxthon", "Q", "opr", "chrome", "BIDUBrowser", "QQBro", "[_$ZUR", "UBrowser", "MSGesture", "plugins", "doNotTrack", "ShockwaveFlash.ShockwaveFlash", "]C|\x18", "webgl2", "platform", "name", "^Win32", "^MacIntel", "^Linux [ix]\\d+", "^BlackBerry", "language", "getPlatform", "getBrowserIndex", "1", "10", 4, 9, "1100", "\t\0", "3c", 256, "w", "TTP", "et", "c", "al", "\u255e", "base", "\u2569\u0975\u094e\u09e5\u09a0\u092e\u09d1\u09ed\u09ce", "target", "fh%PTQr", "#", "\u255f\u097c\u0949\u09f9", 97, "rg", "tnemelEcrs", "fn_Ws", "parentNode", "tagName", "A", "submit", "PX%", "me", "host", "\\.?", "d\x19", "Fri, 01 Feb 2050 00:00:00 GMT", "]E%", "toString", "[object Request]", "headers", 83, "&", encodeURIComponent, "open", "getAllResponseHeaders", "4", "tseuqeRpttHLMX", "Window", "\u2564\u095e", "RI", "\u2550\u0953", "(YaZ", "_", "_str", "V587"]
first = ["", 9527, String, Boolean, "eh", "ad", "Bu", "ileds", "1", "\b", Array, "7", "base", "64De", "\u2543\u252b", "etatS", "pa", "e", "FromUrl", "getOrigi", "nFromUrl", "\u255b\u253e", "b?\x18q)", "ic", "k", "sted", "he", "wser", "oNo", "ckw", "ent", "hst", "^And", "RM", "systemL", 5, "\u255f\u0978\u095b\u09f5", "TR8", "!'", "gth", "er", "TP", 83, "r", !0, "v", "v-nixeh", RegExp, "thsi.cn", 'K\x19"]K^xVV', "KXxAPD?\x1b[Y", document, 0, "allow", 1, "; ", "length", "Init", "=", "; domain=", "checkcookie", !1, "eikooCled", "tnemucod", "d", window, "\u2553\u0972\u0959\u09e4\u09bd\u0938\u0980\u09c5\u09b1\u09d1\u09a7\u09dc\u09dd\u09d3\u09c2", "\u2556\u0979\u095e\u09d3\u09b5\u0935\u098f\u09c7\u099d\u09d2\u09b0", 23, "l$P$~", "frames", "ducument", "ydob", "documentElement", "del", "@[\\]^`{|}~]", "base_fileds", "255", 10, "10", 39, "\u2547\u2535\u255a\u252e\u2541\u2535\u254c\u253c\u2559", 8, "4", "3", "de", 3, "11", 2, "203", "22", "111111", "3f", 16, "\x0f", "\u2506\u2537\u2507\u2537", "11111111", "base64Encode", "v\x1d", "ati", "WY", "te", "bo", "rs", "getHost", Date, "{DF", ":", "^{.*}$", "WU<P[C", 52, "1001", "href", "1111101010", "redirect_url", "^\\s*(?:https?:)?\\/{2,}([^\\/\\?\\#\\\\]+)", "i", "\u256c\u252c\u2516\u254b", "@", "ready", "change", "dy", 7, "protocol", "//s.thsi.cn/js/chameleon/time.1", "onerror", "2000", "readyState", null, "^(\\d+\\.)+\\d+$", "^\\s*(?:(https?:))?\\/{2,}([^\\/\\?\\#\\\\]+)", ".", "strToBytes", "isIPAddr", "serverTimeNow", "addEventListener", "th", "wh", "Scro", "mousemove", 55, "evomhcuot", "[[?PVC\x0e", "getMouseMove", '_R"xWB%Po_3YT', "getMouseClick", "ght", "gin", "msD", "ack", "\u2556\u096b\u095f", "Nativ", "^A", "MozSettingsEvent", "safari", "ActiveXObject", "postMessage", "Uint8Array", "WeakMap", "Google Inc.", "vendor", "chrome", "python", "sgAppName", "JX", 6, "me", "LBBROWSER", "w4", "2345Explorer", "TheWorld", "\u2544", 40, "tTr", "\u2506", "navigator", "webdriver", "languages", "taborcA|FDP", "\u2541\u097c\u0949", 95, "1e0", "e Cli", "iso-8859-1", "defaultCharset", "localStorage", "^Win64", "^Linux armv|Android", "^iPhone", "^iPad", "B_{VV", "getPluginNum", "getBrowserFeature", "12", "16", "sE", "10000", "17", "\u2542\u2532\u2556\u2537\u2543\u2526", "\x1cx`R", 2333, "XMLH", "ers", "0", "lo", 57, "ylppa", "error", "target", "click", "unload", "HE9AWT9Y", "\\.", "c?", "$", "/", "fetch", "prototype", "url", "\u2556\u0971\u0956\u09fe\u09a7", "headers", "\u256b\u2554", 79, "?", "^(.*?):[ \\t]*([^\\r\\n]*)\\r?$", "gm", "s", "src", "analysisRst", "\u255e\u0973\u0949\u09f4\u09a2\u0929\u09ac\u09d4\u0992\u09d2\u09b0\u09d4", "appendChild", "Y", "jsonp_ignore", "^", 70, "421", "XH>a", "\u2574\u253c\u257d\u2530\u2575\u2539\u257c\u2533\u257d\u2522\u256e\u2521\u2560\u2524\u2561\u2525", "CHAMELEON_LOADED"]
var r, e, a , n;
r = e = a = n = first;
var u, c, s , t;
u = c = s = t = second;
function serverTimeNow(){
return parseInt(TOKEN_SERVER_TIME);
}
function v() {
var n = arguments[s[0]];
if (!n)
return r[0];
for (var t = u[1], o = a[1], i = c[2]; i < n.length; i++) {
var v = n.charCodeAt(i)
, f = v ^ o;
o = v,
t += e[2].fromCharCode(f)
}
return t
}
function ot() {
var n, t, e , c;
n = t = e = c = second;
var a, o, i , r;
a = o = i = r = first;
var u = arguments[a[52]];
if (!u)
return o[0];
for (var s = a[0], v = n[267], f = o[200], l = t[2]; l < u.length; l++) {
var p = u.charCodeAt(l);
f = (f + t[0]) % v.length,
p ^= v.charCodeAt(f),
s += i[2].fromCharCode(p)
}
return s
}
var qn = function() {
var n, t, r , a;
n = t = r = a = first;
var e, o, i , s;
e = o = i = s = second;
var u = o[15]
, c = o[102]
, f = e[103];
function l(r) {
var a = o[102]
, i = e[103];
this[n[76]] = r;
for (var u = t[52], c = r["length"]; u < c; u++)
this[u] = t[52]
}
l.prototype.toBuffer = function() {
for (var a = "base_f", u = this["base_fileds"], c = [], s = -e[0], v = o[2], f = u[r[56]]; v < f; v++)
for (var l = this[v], p = u[v], d = s += p; c[d] = l & parseInt(t[77], n[78]),
--p != r[52]; )
--d,
l >>= parseInt(n[79], i[106]);
return c
};
l.prototype.decodeBuffer = function(n) {
for (var r = e[8], a = this[ot(e[108], e[109])], o = t[52], u = e[2], s = a[c + r + f]; u < s; u++) {
var v = a[u]
, l = i[2];
do {
l = (l << t[82]) + n[o++]
} while (--v > t[52]);
this[u] = l >>> i[2]
}
};
return l;
}()
function at() {
var n, t, r;
n = t = r = u;
var a, o, i;
a = o = i = e;
var c = arguments[o[52]];
if (!c)
return t[1];
for (var s = o[0], v = o[1], f = a[52]; f < c.length; f++) {
var l = c.charCodeAt(f)
, p = l ^ v;
v = v * f % n[222] + o[200],
s += i[2].fromCharCode(p)
}
return s
}
var zn
!function(n) {
var t = s[13]
, o = c[53]
, i = r[83]
, f = r[84]
, l = s[110]
, d = r[85]
, h = r[86];
function g(n, a, o, i, u) {
for (var c = s[13], v = r[87], f = n[s[111]]; a < f; )
o[i++] = n[a++] ^ u & parseInt(c + v + t + "11", r[88]),
u = ~(u * parseInt(e[89], e[82]))
}
function w(n) {
for (var t = c[112], i = r[52], v = n[s[111]], f = []; i < v; ) {
var l = n[i++] << parseInt("1" + t, c[113]) | n[i++] << e[82] | n[i++];
f.push(m.charAt(l >> parseInt(e[90], e[82])), m.charAt(l >> parseInt(s[114], e[78]) & parseInt(a[91], r[88])), m.charAt(l >> u[59] & parseInt("6" + o, a[78])), m.charAt(l & parseInt(a[92], u[113])))
}
return f.join(e[0])
}
for (var m = at(u[115], s[116]), I = {}, y = u[2]; y < parseInt(i + "0", e[93]); y++)
I[m.charAt(y)] = y;
function O(n) {
var t, r, e;
t = r = e = s;
var o, i, u;
o = i = u = a;
for (var c = ot(i[94]), l = e[2], p = n[o[56]], d = []; l < p; ) {
var h = I[n.charAt(l++)] << parseInt(at(t[117]), u[82]) | I[n.charAt(l++)] << parseInt(v(t[118], u[95], e[119]), o[88]) | I[n.charAt(l++)] << t[59] | I[n.charAt(l++)];
d.push(h >> parseInt(e[120], t[106]), h >> parseInt(t[121], r[122]) & parseInt(f + b + c, t[106]), h & parseInt(o[96], u[88]))
}
return d
}
function D(n) {
var t = O(n);
if (rn,
p,
t[r[52]] != h)
return error = T + B + l,
void 0;
var a = t[c[0]]
, o = [];
return g(t, +parseInt(e[79], c[122]), o, +u[2], a),
x(o) == a ? o : void 0
}
function x(n) {
var t = o;
t = Vn;
for (var e = c[2], i = a[52], u = n[c[111]]; i < u; i++)
e = (e << s[123]) - e + n[i];
return e & parseInt(s[124], r[88])
}
function N(n) {
var r = x(n)
, e = [h, r];
return g(n, +a[52], e, +a[88], r),
//t = "co", 出问题
w(e)
}
n["base64Encode"] = w,
n["base64Decode"] = O,
n["encode"] = N,
n["decode"] = D
}(zn || (zn = {}));
// var Qn,Wn,N 因为cookie新浏览器一开始可以没有,所以偷懒不设置也可以
// !function(n) {
// n[e[53]] = s[67];
// function t(n) {
// var t = r[51][u[68]]
// , o = u[69] + n + s[70]
// , i = t.indexOf(o);
// if (i == -e[54]) {
// if (o = n + c[70],
// t.substr(r[52], o.length) != o)
// return;
// i = a[52]
// }
// var f = i + o[v(u[71], s[72])]
// , l = t.indexOf(r[55], f);
// return l == -a[54] && (l = t[a[56]]),
// t.substring(f, l)
// }
// n[a[57]] = f;
// function o(n, t, a, o, i) {
// var c = n + r[58] + t;
// o && (c += e[59] + o),
// i && (c += v(Jn, u[73], s[74]) + i),
// a && (c += u[75] + a),
// u[66][u[68]] = c
// }
// n[s[76]] = t;
// function i(n, t, r) {
// this.setCookie(n, u[1], u[77], t, r)
// }
// n[s[78]] = o;
// function f() {
// var t = a[60];
// this.setCookie(t, u[67]),
// this.getCookie(t) || (n[r[53]] = e[61]),
// this.delCookie(t)
// }
// n[Wn(N, r[62], c[79])] = i
// }(Qn || (Qn = {}));
function strhash(n) {
var t, e, a;
t = e = a = s = second;
var o, i, u;
o = i = u = r = first;
n = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
for (var c = u[52], v = a[2], f = n[o[56]]; v < f; v++)
c = (c << a[123]) - c + n.charCodeAt(v),
c >>>= o[52];
return c
}
function getBrowserFeature() {
return 3812;
}
function getPlatform() {
return 7;
}
function getBrowserIndex(){
return 10;
}
function getPluginNum(){
return 5;
}
function timeNow(){
return time = s[52].now(),
time / parseInt(c[131], a[88]) >>> c[2]
}
var En = a[8]
var An = v(a[35], s[49])
var Vn = 0
var Un = s[63]
var rt={};
!function(n) {
var t = e[87], o = a[8], i = e[8], f = s[215], l = r[52], p = s[0], d = parseInt(c[216], u[122]), h = e[86], g = u[217], w = u[123], m = e[165], I = parseInt(t + En, c[122]), y = parseInt(a[79], a[82]), _ = c[218], C = parseInt(a[193], e[82]), E = parseInt(o + i, r[78]), A = parseInt(u[219], s[122]), b = parseInt(f + An, s[106]), T = parseInt(r[194], s[106]), B = parseInt(ot(s[220], e[195]), r[82]), R = parseInt(e[196], u[122]), k = parseInt(e[197], a[78]), S;
function P() {
var n = s[0]
, t = r[88]
, e = parseInt(u[13], c[122])
, a = s[217];
S = new qn([a, a, a, a, n, n, n, e, t, t, t, t, t, t, t, a, t, n]);
S[p] = serverTimeNow();
M(),
S[B] = Vn;
S[k] = Un;
S[R] = c[2];
S[h] = strhash();
S[b] = getBrowserFeature();
S[g] = getPlatform();
S[w] = getBrowserIndex();
S[m] = getPluginNum();
}
function M() { //阅读后发现可以偷懒没直接使用random返回
S[l] = Math.random() * parseInt(u[141], r[78]) >>> r[52]
}
function O() {
S[R]++,
S[p] = serverTimeNow(),
S[d] = timeNow(),
S[B] = Vn,
S[I] = 0,
S[y] = 0,
S[_] = 0,
S[C] = 0,
S[E] = 0,
S[A] = 0;
var n = S.toBuffer();
console.log(n);
return zn.encode(n)
}
//n[e[57]] = P;
P();
function D() {
return O()
}
n["update"] = D
}(rt);
result = rt.update();
console.log(result);
扣js时候还发现了一个TOKEN_SERVER_TIME = 1667085000.423;系统时间是会变动的,所以需要长期使用就需要更新他
失败情况获取的内容:
python代码:
如何请求获取那个系统时间,由请求不成功返回的代码可以看到一个js链接,然后对此链接发出请求就能获得js代码,从而截取保存进入txt文件中
def update_time():
global hexin_v
token_time_url = requests.post(url=url,headers = headers,data=json.dumps(data)).text
url_js = re.compile('<script src="(?P<jsurl>.*?)" type=',re.S)
url_js_t = "http:"+url_js.search(token_time_url).group("jsurl")
tt = requests.get(url_js_t).text
token_time_text = tt[:tt.find(";")+1]
hexin_v = JS.call("rt.update")
with open("token_time.txt","w")as fa:
fa.write(token_time_text+"\n"+hexin_v)
print("已更新hexin-v与时间")
此功能是更新时间以及hexin- v失效时候更新从而恢复正常请求
读取txt文件中的代码拼接上js代码中,再用execjs调用js函数生成hexin-v
with open("token_time.txt","r")as toke:
token_time = toke.readline()
hexin_v = toke.readline()
print(hexin_v)
print(token_time)
with open("hexin-v_get.js","r",encoding="utf-8")as f:
js = f.read()
JS = execjs.compile(token_time+"\n"+js)
最后代码整合:
import json
import re
import execjs
import requests
import requests.utils
global hexin_v
url = "http://www.iwencai.com/customized/chart/get-robot-data"
def update_time(): #更新hexin-v
global hexin_v
token_time_url = requests.post(url=url,headers = headers,data=json.dumps(data)).text
url_js = re.compile('<script src="(?P<jsurl>.*?)" type=',re.S)
url_js_t = "http:"+url_js.search(token_time_url).group("jsurl")
tt = requests.get(url_js_t).text
token_time_text = tt[:tt.find(";")+1]
hexin_v = JS.call("rt.update")
with open("token_time.txt","w")as fa:
fa.write(token_time_text+"\n"+hexin_v)
print("已更新hexin-v与时间")
# with open("hexin.txt","w")as fe:
# fe.write(hexin_v)
with open("token_time.txt","r")as toke:
token_time = toke.readline()
hexin_v = toke.readline()
print(hexin_v)
print(token_time)
with open("hexin-v_get.js","r",encoding="utf-8")as f:
js = f.read()
JS = execjs.compile(token_time+"\n"+js) #读取时间拼接进入js代码中
# hexin_v = JS.call("rt.update")
data = {
"question":"20221030涨停",
"perpage":50,
"page":1,
"secondary_intent":"",
"log_info":{"input_type":"typewrite"},
"source":"Ths_iwencai_Xuangu",
"version":"2.0",
"query_area":"",
"block_list":"",
"add_info":{
"urp":{
"scene":1,
"company":1,
"business":1
},
"contentType":"json",
"searchInfo":True
},
"rsh":"Ths_iwencai_Xuangu_xdoaiz62bgbofsagvw5mfuwnxmdl7mto",
}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"Content-Type": "application/json",
"Referer": "http://www.iwencai.com/unifiedwap/result?w=20221030%E6%B6%A8%E5%81%9C",
"cookie": f"v={hexin_v}",
}
# update_time()
#
try:
resp = requests.post(url=url,data=json.dumps(data),headers=headers).text
if "//192.168.201.240" in resp:
update_time()
print(hexin_v)
raise "出错"
except:
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"Content-Type": "application/json",
"Referer": "http://www.iwencai.com/unifiedwap/result?w=20221030%E6%B6%A8%E5%81%9C",
"cookie": f"v={hexin_v}",
}
resp = requests.post(url=url, data=json.dumps(data), headers=headers).text
print(resp)
最终效果:(这里status_code为0并非失败,而是原本就是这样设计,成功获得数据)
当失效时候自动更新:
heixn-v有效或者更新后正常使用,这样就不用每次请求都更新hexin-v