今天我们来爬取易车中,关于丰田思域的参数配置:
1.抓包:
抓包,定位到目标url:数据及参数没有加密
接下来看看请求头内有没有异常项:有…且不少的样子
2.逆向:
x-sign:
我们先逆向x-sign的生成:搜索关键词
结合目标url,定位到入口函数 s(e,t):
进入s()函数:
分析知道 该if判断一定为真
最后 return s
——> s= yicheUtils.md5(n)
——>n = “cid=” + t.cid + “¶m=” + i + o + t.timestamp
——> i是格式处理,o = r(e, t) ,t.timestamp ?开始往上逆向:
n的逆向:
先进入r():
function r(e, t) {
if (!e.headers || !e.headers["x-platform"])
return t.cid;
var n = t.headerEncryptKeys.find(function(t) {
return t.name == e.headers["x-platform"]
});
return n ? n.value : "DB2560A6EBC65F37A0484295CD4EDD25"
}
// 最后return ——> n如果有值,就n.value,没有就是 "DB2560A6EBC65F37A0484295CD4EDD25"
我们接下来查看t.headerEncryptKeys.find 是否会生成n:
逻辑:t.headerEncryptKeys是一个数组 ——> 查找该数组里面的每一个name ,当name == pc 时返回该数组元素
接下来查看该元素是如何生成的:发现是写死的
#直接复制该数组:
headerEncryptKeys: [{
name: "pc",
value: "19DDD1FBDFF065D3A4DA777D2D7A81EC",
cid: "508"
},
函数最后return "19DDD1FBDFF065D3A4DA777D2D7A81EC" ,赋值给n ————> 再赋给 o
所以o = "19DDD1FBDFF065D3A4DA777D2D7A81EC"
接下来还差t.timestamp :往上逆向发现是时间戳
i 跟o和t.timestamp 都拿到了,n就有了:
整理:
n = "cid=" + t.cid + "¶m=" + i + o + t.timestamp
e.data:{cityId: '522', serialId: '1661'} //参数
i = JSON.stringify(e.data)
t.cid:508 //固定值
o = "19DDD1FBDFF065D3A4DA777D2D7A81EC"
n = "cid=" + t.cid + "¶m=" + i + o + t.timestamp
s的逆向:
进入s = yicheUtils.md5(n):
分析得知是纯md5:
代码:
var crypto = require("crypto");
function my_md5(e) {
return crypto.createHash("md5").update(e).digest("hex");
}
function get_headers(params){
var headers = {}
// 注意不可以生成两次不同的时间
headers["x-timestamp"] = new Date().getTime() + "";
//params = {cityId: '201', serialId: serialId}
var o = "19DDD1FBDFF065D3A4DA777D2D7A81EC";
var s = "cid=" + 508 + "¶m=" + JSON.stringify(params) + o + headers["x-timestamp"]
headers["x-sign"] = my_md5(s);
return headers
}
接下来还差:x-city-id、address、guid
x-city-id
x-ip-address
x-user-guid
进入:
发现 c 和 l 只是获取数据 ——> 可以定死
u()的逻辑是有则取,无则生成
进入window.yicheUtils.createGuid():查看如何生成
发现random,随机值——>可以定死
x-platform:
搜索发现是常量:
3.代码整合:
//03_易车.js:
var crypto = require("crypto");
function my_md5(e) {
return crypto.createHash("md5").update(e).digest("hex");
}
function get_headers(params){
var headers = {}
headers["x-timestamp"] = new Date().getTime() + "";
var o = "19DDD1FBDFF065D3A4DA777D2D7A81EC";
var s = "cid=" + 508 + "¶m=" + JSON.stringify(params) + o + headers["x-timestamp"]
headers["x-sign"] = my_md5(s);
headers["x-city-id"] = '201';
headers["x-ip-address"] = '2408:8207:7890:f430:35e7:9012:b5ee:1fa4';
headers["x-user-guid"] = '3b0fec58-e8fc-4a83-b1b6-672adbe11c76'; // 从cookie拿的.
headers["x-platform"] = "pc";
return headers
}
import subprocess
from functools import partial
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8")
import execjs
import requests
import json
f = open("03_易车.js", mode='r', encoding="utf-8")
js_code = f.read()
f.close()
js = execjs.compile(js_code)
url = "https://mapi.yiche.com/web_api/car_model_api/api/v1/car/config_new_param"
dic = {"cityId": "201", "serialId": "2406"}
params = {
"cid": "508",
"param": json.dumps(dic, separators=(',', ':'))
}
headers = js.call("get_headers", dic)
headers['user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
headers['referer'] = "https://car.yiche.com/dibadaiyage/peizhi/"
headers['cookie'] = "auto_id=6ae32b66b9af3d48b7c670281066438a; selectcity=110100; selectcityid=201; selectcityName=%E5%8C%97%E4%BA%AC; UserGuid=3b0fec58-e8fc-4a83-b1b6-672adbe11c76; CIGUID=3b0fec58-e8fc-4a83-b1b6-672adbe11c76; isWebP=true; locatecity=110100; bitauto_ipregion=2408%3A8207%3A7890%3Af430%3A35e7%3A9012%3Ab5ee%3A1fa4%3A%E5%8C%97%E4%BA%AC%E5%B8%82%3B201%2C%E5%8C%97%E4%BA%AC%2Cbeijing; Hm_lvt_610fee5a506c80c9e1a46aa9a2de2e44=1683892185,1683897993; CIGDCID=wFsTnfJDfExHEfSXsJP6EcQX68ad7TCE; csids=2406; Hm_lpvt_610fee5a506c80c9e1a46aa9a2de2e44=1683899779"
resp = requests.get(url, params=params, headers=headers)
# print(resp.text)
with open("che.json", mode="w", encoding="utf-8") as f:
f.write(resp.text)
print(resp.request.url)