对python代码进行改进
import requests
import execjs
with open('demo.js','r',encoding='utf-8') as f:
js_code = f.read()
cookies = {
'qm_check': 'A1sdRUIQChtxen8pI0dAMRcOUFseEHBeQF0JTjVBWCwycRd1QlhAXFEGFUdeSklaHQdKAAkABAsgJ1dBWD0TR1JRRAp0BQlFEBQ3TSZKFUdBbwxvBBRFIlQsSUhTFxsQU1FVV1NHXEVYVElWBRsCHAkSSQ%3D%3D',
'gr_user_id': '7b300ae1-1a2e-48f3-ac26-ef24f571d4c9',
'USERINFO': 'jHFy6VITNfYjO5gzKnxVCcHVKhkUrDYQL4rEoi%2Bu%2Fa3gWGxj5dQDgJzrkKFOU6FOm%2F4%2FLH%2BKRQ6Kkezd22166yCiZ%2FlYWomlIfHgN40yWpgrHONQD7IehPC71gALl3B5eweCfpHISDgX3EvMl7rYBQ%3D%3D',
'ada35577182650f1_gr_last_sent_cs1': 'qm21331348315',
'aso_ucenter': 'c0160lkfZZ3b6mCL6Ic%2FE9rfux%2FFzvScbLXxY1aeQwexSd5FHwlCTHtQXpRw3BnZag8',
'AUTHKEY': 'kieguXDTNdsivnTuTu%2FDz8rEJtxANQ3H4WyYIrC6WalwuS0e3QcGe2ynYUWdRmasuVvmGU65IVQ%2BOjFG4DNkWj%2BcAjh7wM9GbQyPulS5nrkz5m8CC7qAYQ%3D%3D',
'synct': '1714182214.703',
'syncd': '-138',
'PHPSESSID': '1g4mb7ol6ddoiej2h7rak6eh0k',
'ada35577182650f1_gr_session_id': '5b1ab32f-3236-4628-b301-aea7dcc57299',
'ada35577182650f1_gr_last_sent_sid_with_cs1': '5b1ab32f-3236-4628-b301-aea7dcc57299',
'ada35577182650f1_gr_cs1': 'qm21331348315',
'ada35577182650f1_gr_session_id_sent_vst': '5b1ab32f-3236-4628-b301-aea7dcc57299',
}
headers = {
'authority': 'api.qimai.cn',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
# 'cookie': 'qm_check=A1sdRUIQChtxen8pI0dAMRcOUFseEHBeQF0JTjVBWCwycRd1QlhAXFEGFUdeSklaHQdKAAkABAsgJ1dBWD0TR1JRRAp0BQlFEBQ3TSZKFUdBbwxvBBRFIlQsSUhTFxsQU1FVV1NHXEVYVElWBRsCHAkSSQ%3D%3D; gr_user_id=7b300ae1-1a2e-48f3-ac26-ef24f571d4c9; USERINFO=jHFy6VITNfYjO5gzKnxVCcHVKhkUrDYQL4rEoi%2Bu%2Fa3gWGxj5dQDgJzrkKFOU6FOm%2F4%2FLH%2BKRQ6Kkezd22166yCiZ%2FlYWomlIfHgN40yWpgrHONQD7IehPC71gALl3B5eweCfpHISDgX3EvMl7rYBQ%3D%3D; ada35577182650f1_gr_last_sent_cs1=qm21331348315; aso_ucenter=c0160lkfZZ3b6mCL6Ic%2FE9rfux%2FFzvScbLXxY1aeQwexSd5FHwlCTHtQXpRw3BnZag8; AUTHKEY=kieguXDTNdsivnTuTu%2FDz8rEJtxANQ3H4WyYIrC6WalwuS0e3QcGe2ynYUWdRmasuVvmGU65IVQ%2BOjFG4DNkWj%2BcAjh7wM9GbQyPulS5nrkz5m8CC7qAYQ%3D%3D; synct=1714182214.703; syncd=-138; PHPSESSID=1g4mb7ol6ddoiej2h7rak6eh0k; ada35577182650f1_gr_session_id=5b1ab32f-3236-4628-b301-aea7dcc57299; ada35577182650f1_gr_last_sent_sid_with_cs1=5b1ab32f-3236-4628-b301-aea7dcc57299; ada35577182650f1_gr_cs1=qm21331348315; ada35577182650f1_gr_session_id_sent_vst=5b1ab32f-3236-4628-b301-aea7dcc57299',
'origin': 'https://www.qimai.cn',
'pragma': 'no-cache',
'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}
t = {
"url": "/rank/indexPlus/brand_id/0",
"params": {
"brand": "all",
"country": "cn",
"device": "iphone",
"genre": "36",
"date": "2024-04-27",
"page": 2
},
"baseURL": "https://api.qimai.cn",
}
analysis = execjs.compile(js_code).call('get_analysis', t)
params = t['params']
params['analysis'] = analysis
response = requests.get(t['baseURL']+t['url'], params=params, cookies=cookies, headers=headers)
print(response.json())
页面有三个数据包,想要获取哪种类型的数据,就改t中url最后的数字,0,1,2。
多页爬取就改page。
多页爬取python代码:
import requests
import execjs
import time
with open('demo.js','r',encoding='utf-8') as f:
js_code = f.read()
cookies = {
'qm_check': 'A1sdRUIQChtxen8pI0dAMRcOUFseEHBeQF0JTjVBWCwycRd1QlhAXFEGFUdeSklaHQdKAAkABAsgJ1dBWD0TR1JRRAp0BQlFEBQ3TSZKFUdBbwxvBBRFIlQsSUhTFxsQU1FVV1NHXEVYVElWBRsCHAkSSQ%3D%3D',
'gr_user_id': '7b300ae1-1a2e-48f3-ac26-ef24f571d4c9',
'USERINFO': 'jHFy6VITNfYjO5gzKnxVCcHVKhkUrDYQL4rEoi%2Bu%2Fa3gWGxj5dQDgJzrkKFOU6FOm%2F4%2FLH%2BKRQ6Kkezd22166yCiZ%2FlYWomlIfHgN40yWpgrHONQD7IehPC71gALl3B5eweCfpHISDgX3EvMl7rYBQ%3D%3D',
'ada35577182650f1_gr_last_sent_cs1': 'qm21331348315',
'aso_ucenter': 'c0160lkfZZ3b6mCL6Ic%2FE9rfux%2FFzvScbLXxY1aeQwexSd5FHwlCTHtQXpRw3BnZag8',
'AUTHKEY': 'kieguXDTNdsivnTuTu%2FDz8rEJtxANQ3H4WyYIrC6WalwuS0e3QcGe2ynYUWdRmasuVvmGU65IVQ%2BOjFG4DNkWj%2BcAjh7wM9GbQyPulS5nrkz5m8CC7qAYQ%3D%3D',
'synct': '1714182214.703',
'syncd': '-138',
'PHPSESSID': '1g4mb7ol6ddoiej2h7rak6eh0k',
'ada35577182650f1_gr_session_id': '5b1ab32f-3236-4628-b301-aea7dcc57299',
'ada35577182650f1_gr_last_sent_sid_with_cs1': '5b1ab32f-3236-4628-b301-aea7dcc57299',
'ada35577182650f1_gr_cs1': 'qm21331348315',
'ada35577182650f1_gr_session_id_sent_vst': '5b1ab32f-3236-4628-b301-aea7dcc57299',
}
headers = {
'authority': 'api.qimai.cn',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
# 'cookie': 'qm_check=A1sdRUIQChtxen8pI0dAMRcOUFseEHBeQF0JTjVBWCwycRd1QlhAXFEGFUdeSklaHQdKAAkABAsgJ1dBWD0TR1JRRAp0BQlFEBQ3TSZKFUdBbwxvBBRFIlQsSUhTFxsQU1FVV1NHXEVYVElWBRsCHAkSSQ%3D%3D; gr_user_id=7b300ae1-1a2e-48f3-ac26-ef24f571d4c9; USERINFO=jHFy6VITNfYjO5gzKnxVCcHVKhkUrDYQL4rEoi%2Bu%2Fa3gWGxj5dQDgJzrkKFOU6FOm%2F4%2FLH%2BKRQ6Kkezd22166yCiZ%2FlYWomlIfHgN40yWpgrHONQD7IehPC71gALl3B5eweCfpHISDgX3EvMl7rYBQ%3D%3D; ada35577182650f1_gr_last_sent_cs1=qm21331348315; aso_ucenter=c0160lkfZZ3b6mCL6Ic%2FE9rfux%2FFzvScbLXxY1aeQwexSd5FHwlCTHtQXpRw3BnZag8; AUTHKEY=kieguXDTNdsivnTuTu%2FDz8rEJtxANQ3H4WyYIrC6WalwuS0e3QcGe2ynYUWdRmasuVvmGU65IVQ%2BOjFG4DNkWj%2BcAjh7wM9GbQyPulS5nrkz5m8CC7qAYQ%3D%3D; synct=1714182214.703; syncd=-138; PHPSESSID=1g4mb7ol6ddoiej2h7rak6eh0k; ada35577182650f1_gr_session_id=5b1ab32f-3236-4628-b301-aea7dcc57299; ada35577182650f1_gr_last_sent_sid_with_cs1=5b1ab32f-3236-4628-b301-aea7dcc57299; ada35577182650f1_gr_cs1=qm21331348315; ada35577182650f1_gr_session_id_sent_vst=5b1ab32f-3236-4628-b301-aea7dcc57299',
'origin': 'https://www.qimai.cn',
'pragma': 'no-cache',
'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}
for page in range(1,10):
time.sleep(1)
print(f'正在爬取第{page}页数据------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
t = {
"url": "/rank/indexPlus/brand_id/0",
"params": {
"brand": "all",
"country": "cn",
"device": "iphone",
"genre": "36",
"date": "2024-04-27",
"page": page,
},
"baseURL": "https://api.qimai.cn",
}
analysis = execjs.compile(js_code).call('get_analysis', t)
params = t['params']
params['analysis'] = analysis
response = requests.get(t['baseURL']+t['url'], params=params, cookies=cookies, headers=headers)
json_data = response.json()
for index in json_data['list']:
print(index)
js代码:
var s = 238;
var H = 0;
var v = '@#';
function i_qt(n, t) {
for (var e = (n = n.split('')).length, r = t.length, a = "charCodeAt", i = H; i < e; i++)
n[i] = o(n[i][a](H) ^ t[(i + 10) % r][a](H));
return n.join('')
};
function o(n) {
t = "",
['66', '72', '6f', '6d', '43', '68', '61', '72', '43', '6f', '64', '65'].forEach(function (n) {
t += unescape('%u00' + n)
});
var t, e = t;
return String[e](n)
};
function i_jt(t) {
t = encodeURIComponent(t).replace(/%([0-9A-F]{2})/g, function (n, t) {
return o('0x' + t)
});
return btoa(t)
};
function get_analysis(t) {
var n;
var e, r = +new Date - (s || H) - 1661224081041, a = [];
Object.keys(t.params).forEach(function (n) {
if (n == "analysis")
return false;
t.params.hasOwnProperty(n) && a.push(t.params[n])
})
a = a.sort().join('')
a = i_jt(a)
a = (a += v + t.url.replace(t.baseURL, '')) + (v + r) + (v + 3)
e = i_jt(i_qt(a, 'xyz517cda96efgh'))
return e
}
结果展现: