刚做过题,在此简单的做一下记录,以便以后查看。
目标
抓取所有(5页)机票的价格,并计算所有机票价格的平均值。
分析网页
首先打开开发者工具,然后刷新页面进行请求,在这里可以看到如下界面:
在这里发现该网站对调试进行了限制,那么在此处我们就可以在相应的debugger代码处进行设置,将其设置为永不在此处停留即可。
在网页加载完成之后,我们就可以在XHR中找到对应的数据包,如下所示:
在此处可以看到其加密参数m:
至此,我们已经明确了我们的目标,破解加密参数m。之后携带这个参数对目标网站发起请求即可获得数据。
找js代码
在明确目标之后,我们就需要对加密参数进行破解,一般加密过程都是由js代码执行的,所以我们需要找出其加密的js代码,然后使用python对其进行复写或者调用。
一般情况下,我们可以直接对加密参数进行搜索,但是搜索此处的参数m不太方便,因为会有大量的数据出现。
我在此处采用的方法是根据xhr请求去找其发起程序,如下图:
在此处可以找到xhr请求调用堆栈,在这里我们进入request请求,可以看到里面的js代码是混淆过的,不利于查找,那么我们可以对其进行美化,或者打上断点进行调试。
打上断点进行调试(不确定就多打断点),发现如下所示的参数m和加密的数据m很相似,我们就可以确定参数m的加密就在此处。
其加密核心如要是以下代码:
var _0x2268f9 = Date['\x70\x61\x72\x73\x65'](new Date()) + (16798545 + -72936737 + 156138192)
, _0x57feae = oo0O0(_0x2268f9['\x74\x6f\x53\x74\x72' + '\x69\x6e\x67']()) + window['\x66'];
const _0x5d83a3 = {};
_0x5d83a3['\x70\x61\x67\x65'] = window['\x70\x61\x67\x65'],
_0x5d83a3['\x6d'] = _0x57feae + '\u4e28' + _0x2268f9 / (-1 * 3483 + -9059 + 13542);
也就是如下代码:
var _0x2268f9 = Date.parse(new Date()) + 100000000, _0x57feae = oo0O0(_0x2268f9.toString()) + window.f;
const _0x5d83a3 = {};
_0x5d83a3.page = window.page;
_0x5d83a3.m = oo0O0(_0x2268f9.toString()) + window.f + '丨' + _0x2268f9 / 1000;
由此我们就可以确定_0x2268f9,下面我们就要找oo0O0函数以及window.f,之后就可以确定m的值。经过搜索,可以在页面源码中找到对应函数:
function oo0O0(mw) {
window.b = '';
for (var i = 0,
len = window.a.length; i < len; i++) {
console.log(window.a[i]);
window.b += String[document.e + document.g](window.a[i][document.f + document.h]() - i - window.c)
}
var U = ['W5r5W6VdIHZcT8kU', 'WQ8CWRaxWQirAW=='];
var J = function(o, E) {
o = o - 0x0;
var N = U[o];
if (J['bSSGte'] === undefined) {
var Y = function(w) {
var m = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=',
T = String(w)['replace'](/=+$/, '');
var A = '';
for (var C = 0x0,
b, W, l = 0x0; W = T['charAt'](l++);~W && (b = C % 0x4 ? b * 0x40 + W: W, C++%0x4) ? A += String['fromCharCode'](0xff & b >> ( - 0x2 * C & 0x6)) : 0x0) {
W = m['indexOf'](W)
}
return A
};
var t = function(w, m) {
var T = [],
A = 0x0,
C,
b = '',
W = '';
w = Y(w);
for (var R = 0x0,
v = w['length']; R < v; R++) {
W += '%' + ('00' + w['charCodeAt'](R)['toString'](0x10))['slice']( - 0x2)
}
w = decodeURIComponent(W);
var l;
for (l = 0x0; l < 0x100; l++) {
T[l] = l
}
for (l = 0x0; l < 0x100; l++) {
A = (A + T[l] + m['charCodeAt'](l % m['length'])) % 0x100,
C = T[l],
T[l] = T[A],
T[A] = C
}
l = 0x0,
A = 0x0;
for (var L = 0x0; L < w['length']; L++) {
l = (l + 0x1) % 0x100,
A = (A + T[l]) % 0x100,
C = T[l],
T[l] = T[A],
T[A] = C,
b += String['fromCharCode'](w['charCodeAt'](L) ^ T[(T[l] + T[A]) % 0x100])
}
return b
};
J['luAabU'] = t,
J['qlVPZg'] = {},
J['bSSGte'] = !![]
}
var H = J['qlVPZg'][o];
return H === undefined ? (J['TUDBIJ'] === undefined && (J['TUDBIJ'] = !![]), N = J['luAabU'](N, E), J['qlVPZg'][o] = N) : N = H,
N
};
eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27'));
return ''
}
可以发现该函数返回值为空字符。那么我们就只需要确定,window.f即可。在控制台输出window.f等于"227e2d547d1436ba2cfc6bc4526967e9"?你以为window.f是一个定值吗?很明显你想多了
再次请求发现window.f发生了变化。问题在哪呢?
eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27'));
// 这段代码生成window.f,其内部是由md5加密生成的,可在控制台打印结果
问题出在oo0O0函数内部,函数不只是单纯的返回空字符,该函数在内部暗度陈仓动态生成window.f,由eval函数执行产生。
至此,我们就可以编写程序实现数据获取。
编写程序
确定完加密参数后对网站发起请求就十分简单了,这里直接放代码:
import requests
import execjs
from urllib.parse import quote
ctx = execjs.compile(open(r'js-1.js',encoding='utf-8').read())
m = ctx.call('getToken')
total_price = 0
total_num = 0
url = 'https://match.yuanrenxue.com/api/match/1?'
headers = {
'user-agent':'yuanrenxue.project',
'referer': 'https://match.yuanrenxue.com/match/1',
}
for i in range(1,6):
url_n = url + 'page={}&m={}'.format(i,m)
res = requests.get(url_n,headers=headers).json()
for data in res['data']:
total_price += int(data['value'])
total_num += 1
print('avger_price is %d'%(total_price/total_num))
js代码如下:
var hexcase = 0;
var b64pad = "";
var chrsz = 16;
function hex_md5(a) {
return binl2hex(core_md5(str2binl(a), a.length * chrsz))
}
function b64_md5(a) {
return binl2b64(core_md5(str2binl(a), a.length * chrsz))
}
function str_md5(a) {
return binl2str(core_md5(str2binl(a), a.length * chrsz))
}
function hex_hmac_md5(a, b) {
return binl2hex(core_hmac_md5(a, b))
}
function b64_hmac_md5(a, b) {
return binl2b64(core_hmac_md5(a, b))
}
function str_hmac_md5(a, b) {
return binl2str(core_hmac_md5(a, b))
}
function md5_vm_test() {
return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72"
}
function core_md5(p, k) {
p[k >> 5] |= 128 << ((k) % 32);
p[(((k + 64) >>> 9) << 4) + 14] = k;
var o = 1732584193;
var n = -271733879;
var m = -1732584194;
var l = 271733878;
for (var g = 0; g < p.length; g += 16) {
var j = o;
var h = n;
var f = m;
var e = l;
o = md5_ff(o, n, m, l, p[g + 0], 7, -680976936);
l = md5_ff(l, o, n, m, p[g + 1], 12, -389564586);
m = md5_ff(m, l, o, n, p[g + 2], 17, 606105819);
n = md5_ff(n, m, l, o, p[g + 3], 22, -1044525330);
o = md5_ff(o, n, m, l, p[g + 4], 7, -176418897);
l = md5_ff(l, o, n, m, p[g + 5], 12, 1200080426);
m = md5_ff(m, l, o, n, p[g + 6], 17, -1473231341);
n = md5_ff(n, m, l, o, p[g + 7], 22, -45705983);
o = md5_ff(o, n, m, l, p[g + 8], 7, 1770035416);
l = md5_ff(l, o, n, m, p[g + 9], 12, -1958414417);
m = md5_ff(m, l, o, n, p[g + 10], 17, -42063);
n = md5_ff(n, m, l, o, p[g + 11], 22, -1990404162);
o = md5_ff(o, n, m, l, p[g + 12], 7, 1804660682);
l = md5_ff(l, o, n, m, p[g + 13], 12, -40341101);
m = md5_ff(m, l, o, n, p[g + 14], 17, -1502002290);
n = md5_ff(n, m, l, o, p[g + 15], 22, 1236535329);
o = md5_gg(o, n, m, l, p[g + 1], 5, -165796510);
l = md5_gg(l, o, n, m, p[g + 6], 9, -1069501632);
m = md5_gg(m, l, o, n, p[g + 11], 14, 643717713);
n = md5_gg(n, m, l, o, p[g + 0], 20, -373897302);
o = md5_gg(o, n, m, l, p[g + 5], 5, -701558691);
l = md5_gg(l, o, n, m, p[g + 10], 9, 38016083);
m = md5_gg(m, l, o, n, p[g + 15], 14, -660478335);
n = md5_gg(n, m, l, o, p[g + 4], 20, -405537848);
o = md5_gg(o, n, m, l, p[g + 9], 5, 568446438);
l = md5_gg(l, o, n, m, p[g + 14], 9, -1019803690);
m = md5_gg(m, l, o, n, p[g + 3], 14, -187363961);
n = md5_gg(n, m, l, o, p[g + 8], 20, 1163531501);
o = md5_gg(o, n, m, l, p[g + 13], 5, -1444681467);
l = md5_gg(l, o, n, m, p[g + 2], 9, -51403784);
m = md5_gg(m, l, o, n, p[g + 7], 14, 1735328473);
n = md5_gg(n, m, l, o, p[g + 12], 20, -1921207734);
o = md5_hh(o, n, m, l, p[g + 5], 4, -378558);
l = md5_hh(l, o, n, m, p[g + 8], 11, -2022574463);
m = md5_hh(m, l, o, n, p[g + 11], 16, 1839030562);
n = md5_hh(n, m, l, o, p[g + 14], 23, -35309556);
o = md5_hh(o, n, m, l, p[g + 1], 4, -1530992060);
l = md5_hh(l, o, n, m, p[g + 4], 11, 1272893353);
m = md5_hh(m, l, o, n, p[g + 7], 16, -155497632);
n = md5_hh(n, m, l, o, p[g + 10], 23, -1094730640);
o = md5_hh(o, n, m, l, p[g + 13], 4, 681279174);
l = md5_hh(l, o, n, m, p[g + 0], 11, -358537222);
m = md5_hh(m, l, o, n, p[g + 3], 16, -722881979);
n = md5_hh(n, m, l, o, p[g + 6], 23, 76029189);
o = md5_hh(o, n, m, l, p[g + 9], 4, -640364487);
l = md5_hh(l, o, n, m, p[g + 12], 11, -421815835);
m = md5_hh(m, l, o, n, p[g + 15], 16, 530742520);
n = md5_hh(n, m, l, o, p[g + 2], 23, -995338651);
o = md5_ii(o, n, m, l, p[g + 0], 6, -198630844);
l = md5_ii(l, o, n, m, p[g + 7], 10, 11261161415);
m = md5_ii(m, l, o, n, p[g + 14], 15, -1416354905);
n = md5_ii(n, m, l, o, p[g + 5], 21, -57434055);
o = md5_ii(o, n, m, l, p[g + 12], 6, 1700485571);
l = md5_ii(l, o, n, m, p[g + 3], 10, -1894446606);
m = md5_ii(m, l, o, n, p[g + 10], 15, -1051523);
n = md5_ii(n, m, l, o, p[g + 1], 21, -2054922799);
o = md5_ii(o, n, m, l, p[g + 8], 6, 1873313359);
l = md5_ii(l, o, n, m, p[g + 15], 10, -30611744);
m = md5_ii(m, l, o, n, p[g + 6], 15, -1560198380);
n = md5_ii(n, m, l, o, p[g + 13], 21, 1309151649);
o = md5_ii(o, n, m, l, p[g + 4], 6, -145523070);
l = md5_ii(l, o, n, m, p[g + 11], 10, -1120210379);
m = md5_ii(m, l, o, n, p[g + 2], 15, 718787259);
n = md5_ii(n, m, l, o, p[g + 9], 21, -343485551);
o = safe_add(o, j);
n = safe_add(n, h);
m = safe_add(m, f);
l = safe_add(l, e)
}
return Array(o, n, m, l)
}
function md5_cmn(h, e, d, c, g, f) {
return safe_add(bit_rol(safe_add(safe_add(e, h), safe_add(c, f)), g), d)
}
function md5_ff(g, f, k, j, e, i, h) {
return md5_cmn((f & k) | ((~f) & j), g, f, e, i, h)
}
function md5_gg(g, f, k, j, e, i, h) {
return md5_cmn((f & j) | (k & (~j)), g, f, e, i, h)
}
function md5_hh(g, f, k, j, e, i, h) {
return md5_cmn(f ^ k ^ j, g, f, e, i, h)
}
function md5_ii(g, f, k, j, e, i, h) {
return md5_cmn(k ^ (f | (~j)), g, f, e, i, h)
}
function core_hmac_md5(c, f) {
var e = str2binl(c);
if (e.length > 16) {
e = core_md5(e, c.length * chrsz)
}
var a = Array(16)
, d = Array(16);
for (var b = 0; b < 16; b++) {
a[b] = e[b] ^ 909522486;
d[b] = e[b] ^ 1549556828
}
var g = core_md5(a.concat(str2binl(f)), 512 + f.length * chrsz);
return core_md5(d.concat(g), 512 + 128)
}
function safe_add(a, d) {
var c = (a & 65535) + (d & 65535);
var b = (a >> 16) + (d >> 16) + (c >> 16);
return (b << 16) | (c & 65535)
}
function bit_rol(a, b) {
return (a << b) | (a >>> (32 - b))
}
function str2binl(d) {
var c = Array();
var a = (1 << chrsz) - 1;
for (var b = 0; b < d.length * chrsz; b += chrsz) {
c[b >> 5] |= (d.charCodeAt(b / chrsz) & a) << (b % 32)
}
return c
}
function binl2str(c) {
var d = "";
var a = (1 << chrsz) - 1;
for (var b = 0; b < c.length * 32; b += chrsz) {
d += String.fromCharCode((c[b >> 5] >>> (b % 32)) & a)
}
return d
}
function binl2hex(c) {
var b = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
var d = "";
for (var a = 0; a < c.length * 4; a++) {
d += b.charAt((c[a >> 2] >> ((a % 4) * 8 + 4)) & 15) + b.charAt((c[a >> 2] >> ((a % 4) * 8)) & 15)
}
return d
}
function binl2b64(d) {
var c = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var f = "";
for (var b = 0; b < d.length * 4; b += 3) {
var e = (((d[b >> 2] >> 8 * (b % 4)) & 255) << 16) | (((d[b + 1 >> 2] >> 8 * ((b + 1) % 4)) & 255) << 8) | ((d[b + 2 >> 2] >> 8 * ((b + 2) % 4)) & 255);
for (var a = 0; a < 4; a++) {
if (b * 8 + a * 6 > d.length * 32) {
f += b64pad
} else {
f += c.charAt((e >> 6 * (3 - a)) & 63)
}
}
}
return f
}
function getToken() {
var t = Date.parse(new Date()) + 100000000;
var res ;
var d = hex_md5(t.toString())
res = d + '丨' + t / 1000;
return res
}