今天我们来爬取观鸟:中国观鸟记录中心活动报告,的数据:
1.抓包:
发现目标url:https://api.birdreport.cn/front/record/activity/search
及可疑请求头参数:三个
携带参数:
2.分析:
我们发现,携带的参数不好入手!
但是我们知道,url携带的可变加密参数,百分之99是通过浏览器生成某个随机值(如时间戳),然后结合该随机值与其他参数进行运算,最终得到一个可变的加密参数。
所以我们可以合理推测:携带的可疑请求头参数中应该至少有哪一个参与了加密运算。
我们先尝试搜索Timestamp
定位到可疑位置:发现是使用了eval进行了反爬
我们将eval内的代码复制出来,在控制台中查看真实代码:
// eval()出来的源码:
function getUuid() {
var s = [];
var a = "0123456789abcdef";
for (var i = 0; i < 32; i++) {
s[i] = a.substr(Math.floor(Math.random() * 0x10), 1)
}
s[14] = "4";
s[19] = a.substr((s[19] & 0x3) | 0x8, 1);
s[8] = s[13] = s[18] = s[23];
var b = s.join("");
return b
}
function sort_ASCII(a) {
var b = new Array();
var c = 0;
for (var i in a) {
b[c] = i;
c++
}
var d = b.sort();
var e = {};
for (var i in d) {
e[d[i]] = a[d[i]]
}
return e
}
function url2json(a) {
var b = /^[^\\?]+\\?([\\w\\W]+)$/
, reg_para = /([^&=]+)=([\\w\\W]*?)(&|$|#)/g
, arr_url = b.exec(a)
, ret = {};
if (arr_url && arr_url[1]) {
var c = arr_url[1], result;
while ((result = reg_para.exec(c)) != null) {
ret[result[1]] = result[2]
}
}
return ret
}
function dataTojson(a) {
var b = [];
var c = {};
b = a.split('&');
for (var i = 0; i < b.length; i++) {
if (b[i].indexOf('=') != -1) {
var d = b[i].split('=');
if (d.length == 2) {
c[d[0]] = d[1]
} else {
c[d[0]] = ""
}
} else {
c[b[i]] = ''
}
}
return c
}
const serialize = function(a) {
var b = [];
for (var p in a)
if (a.hasOwnProperty(p) && a[p]) {
b.push(encodeURIComponent(p) + '=' + encodeURIComponent(a[p]))
}
return b.join('&')
};
var paramPublicKey = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCvxXa98E1uWXnBzXkS2yHUfnBM6n3PCwLdfIox03T91joBvjtoDqiQ5x3tTOfpHs3LtiqMMEafls6b0YWtgB1dse1W5m+FpeusVkCOkQxB4SZDH6tuerIknnmB/Hsq5wgEkIvO5Pff9biig6AyoAkdWpSek/1/B7zYIepYY0lxKQIDAQAB";
var encrypt = new JSEncrypt();
encrypt.setPublicKey(paramPublicKey);
$.ajaxSetup({
beforeSend: function(a, b) {
var c = Date.parse(new Date());
var d = getUuid();
var e = JSON.stringify(sort_ASCII(dataTojson(b.data || '{}')));
b.data = encrypt.encryptUnicodeLong(e);
var f = MD5(e + d + c);
a.setRequestHeader("timestamp", c);
a.setRequestHeader('requestId', d);
a.setRequestHeader('sign', f)
}
});
发现了关键词timestamp、requestId、sign!!
而且疑似 b.data = encrypt.encryptUnicodeLong(e); 可能就是携带的参数。
我们接下来尝试使用扣下来的代码,尝试能不能生成加密参数:
由于源码中使用了new JSEncrypt()、md5()
所以我们还需要下载js第三方库:node-jsencrypt、crypto
var JSEncrypt = require("node-jsencrypt");
var crypto = require("crypto");
function getUuid() {
var s = [];
var a = "0123456789abcdef";
for (var i = 0; i < 32; i++) {
s[i] = a.substr(Math.floor(Math.random() * 0x10), 1)
}
s[14] = "4";
s[19] = a.substr((s[19] & 0x3) | 0x8, 1);
s[8] = s[13] = s[18] = s[23];
var b = s.join("");
return b
}
function sort_ASCII(a) {
var b = new Array();
var c = 0;
for (var i in a) {
b[c] = i;
c++
}
var d = b.sort();
var e = {};
for (var i in d) {
e[d[i]] = a[d[i]]
}
return e
}
function url2json(a) {
var b = /^[^\\?]+\\?([\\w\\W]+)$/
, reg_para = /([^&=]+)=([\\w\\W]*?)(&|$|#)/g
, arr_url = b.exec(a)
, ret = {};
if (arr_url && arr_url[1]) {
var c = arr_url[1], result;
while ((result = reg_para.exec(c)) != null) {
ret[result[1]] = result[2]
}
}
return ret
}
function dataTojson(a) {
var b = [];
var c = {};
b = a.split('&');
for (var i = 0; i < b.length; i++) {
if (b[i].indexOf('=') != -1) {
var d = b[i].split('=');
if (d.length == 2) {
c[d[0]] = d[1]
} else {
c[d[0]] = ""
}
} else {
c[b[i]] = ''
}
}
return c
}
const serialize = function(a) {
var b = [];
for (var p in a)
if (a.hasOwnProperty(p) && a[p]) {
b.push(encodeURIComponent(p) + '=' + encodeURIComponent(a[p]))
}
return b.join('&')
};
var paramPublicKey = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCvxXa98E1uWXnBzXkS2yHUfnBM6n3PCwLdfIox03T91joBvjtoDqiQ5x3tTOfpHs3LtiqMMEafls6b0YWtgB1dse1W5m+FpeusVkCOkQxB4SZDH6tuerIknnmB/Hsq5wgEkIvO5Pff9biig6AyoAkdWpSek/1/B7zYIepYY0lxKQIDAQAB";
var encrypt = new JSEncrypt();
encrypt.setPublicKey(paramPublicKey);
//修改代码:
function functionget_headers(a, b){
var c = Date.parse(new Date());
var d = getUuid();
var e = JSON.stringify(sort_ASCII(dataTojson(b || '{}')));
//导包测试
b = encrypt.encryptUnicodeLong(e);
//var f = MD5(e + d + c);
var f = crypto.createHash("md5").update(e + d + c).digest('hex');
return f
};
console.log(functionget_headers('123456','123456'))
发现可以运行了:
但是我们不确定这个是不是符合,且该代码并不在浏览器源码中,没办法断点测试
我们注意到源码中有:$.ajaxSetup
相当于aiax的拦截器,每一个ajax请求都要走这里
那么我们可以尝试进入ajax ——> 因为当运行ajax的时候,会调用之前的Ajax设置。
进入:
搜:beforesend
进入:
经过测试:确定就是这里
且拿到了e ——> 经过加密之后,就是url携带的参数
3.整合代码:
//看鸟.js:
var JSEncrypt = require("node-jsencrypt");
var crypto = require("crypto");
function getUuid() {
var s = [];
var a = "0123456789abcdef";
for (var i = 0; i < 32; i++) {
s[i] = a.substr(Math.floor(Math.random() * 0x10), 1)
}
s[14] = "4";
s[19] = a.substr((s[19] & 0x3) | 0x8, 1);
s[8] = s[13] = s[18] = s[23];
var b = s.join("");
return b
}
function sort_ASCII(a) {
var b = new Array();
var c = 0;
for (var i in a) {
b[c] = i;
c++
}
var d = b.sort();
var e = {};
for (var i in d) {
e[d[i]] = a[d[i]]
}
return e
}
function url2json(a) {
var b = /^[^\\?]+\\?([\\w\\W]+)$/
, reg_para = /([^&=]+)=([\\w\\W]*?)(&|$|#)/g
, arr_url = b.exec(a)
, ret = {};
if (arr_url && arr_url[1]) {
var c = arr_url[1], result;
while ((result = reg_para.exec(c)) != null) {
ret[result[1]] = result[2]
}
}
return ret
}
function dataTojson(a) {
var b = [];
var c = {};
b = a.split('&');
for (var i = 0; i < b.length; i++) {
if (b[i].indexOf('=') != -1) {
var d = b[i].split('=');
if (d.length == 2) {
c[d[0]] = d[1]
} else {
c[d[0]] = ""
}
} else {
c[b[i]] = ''
}
}
return c
}
const serialize = function(a) {
var b = [];
for (var p in a)
if (a.hasOwnProperty(p) && a[p]) {
b.push(encodeURIComponent(p) + '=' + encodeURIComponent(a[p]))
}
return b.join('&')
};
var paramPublicKey = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCvxXa98E1uWXnBzXkS2yHUfnBM6n3PCwLdfIox03T91joBvjtoDqiQ5x3tTOfpHs3LtiqMMEafls6b0YWtgB1dse1W5m+FpeusVkCOkQxB4SZDH6tuerIknnmB/Hsq5wgEkIvO5Pff9biig6AyoAkdWpSek/1/B7zYIepYY0lxKQIDAQAB";
var encrypt = new JSEncrypt();
encrypt.setPublicKey(paramPublicKey);
//原先的例参a并没有用
function get_headers(b) {
var c = Date.parse(new Date());
var d = getUuid();
var e = JSON.stringify(sort_ASCII(dataTojson(b || '{}')));
b = encrypt.encryptUnicodeLong(e);
var f = crypto.createHash("md5").update(e + d + c).digest('hex');
return {
"headers": {
timestamp: c +"",
requestId: d,
sign: f
},
"data": b
}
}
//从浏览器中得知b是数据
//var b = "city=&ctime=&district=&endTime=&limit=20&mode=0&outside_type=0&page=4&pointname=&province=%E9%9D%92%E6%B5%B7%E7%9C%81&serial_id=&startTime=&state=&taxonid=&taxonname=&username="
//console.log(get_headers(b))
python代码:
import subprocess
from functools import partial
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8")
import execjs
import requests
import json
from urllib.parse import urlencode, quote
f = open("看鸟.js", mode='r', encoding="utf-8")
js_code = f.read()
f.close()
js = execjs.compile(js_code)
data = {
"city": "",
"ctime": "",
"district": "",
"endTime": "",
"limit": "20",
"mode": "0",
"outside_type": "0",
"page": "4", //页数
"pointname": "",
"province": "青海省", //%E9%9D%92%E6%B5%B7%E7%9C%81 还原回来的
"serial_id": "",
"startTime": "",
"state": "",
"taxonid": "",
"taxonname": "",
"username": "",
}
encoded = urlencode(data)
ret = js.call("get_headers", encoded)
url = "https://api.birdreport.cn/front/record/activity/search"
headers = ret['headers']
session = requests.session()
session.headers = headers
session.headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
session.headers['Referer'] = "http://www.birdreport.cn/"
session.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
# 直接传数据. 即可.
resp = session.post(url, data=ret['data'])
print(resp.text)
4. 另一种思路:hook
因为携带的参数是整块的密文,所以网站大概率会使用JSON.stringify,来将数据转换为json字符串之后再进行加密(且在我们eval到的源码中,也可以看到JSON.stringify),所以我们可以尝试:hook——>JSON.stringify
hook代码:
JSON.stringify_ = JSON.stringify;
JSON.stringify = function(){
debugger; // 断下来. 为了去找它前面或者后面做了什么
return JSON.stringify_.apply(this, arguments);
}
将代码植入,换页发现断住了:
切换调用栈:同样可以找到加密函数
数据的解密:
同理,我们也可以hook——>JSON.parse
JSON.parse_ = JSON.parse;
JSON.parse = function(){
debugger; // 断下来. 为了去找它前面或者后面做了什么
return JSON.parse_.apply(this, arguments);
}
进入下一个调用栈:
基本可以确定BIRDREPORT_APIJS.decode()就是解密函数
BIRDREPORT_APIJS.decode():是aes解密
代码:
const CryptoJS = require("crypto-js");
function decry(a) {
var b = CryptoJS.enc.Utf8.parse('3583ec0257e2f4c8195eec7410ff1619');
var c = CryptoJS.enc.Utf8.parse("d93c0d5ec6352f20");
var d = CryptoJS.AES.decrypt(a, b, {
iv: c,
mode: CryptoJS.mode.CBC,
padding: CryptoJS.pad.Pkcs7
});
return d.toString(CryptoJS.enc.Utf8)
}
var a="数据"
console.log(decry(a))