概要
逆向方法很简单,请求getInterfaceCode接口
整体架构流程
请求getInterfaceCode接口,会直接返回Interface-Code的参数,将信息填加到params中去请求,将返回json数据
技术名词解释
getInterfaceCode = "https://web-drcn.hispace.dbankcloud.com/edge/webedge/getInterfaceCode"
技术细节
主调用代码,通过调用工具类tools代码,获取会话id,在请求各个接口
import requests
from tools import getInterfaceCode,getrealTabId
class HuaweiSpiderApp:
def __init__(self):
self.getInterfaceCode = getInterfaceCode()
self.headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Interface-Code': self.getInterfaceCode,
'Origin': 'https://appgallery.huawei.com',
'Referer': 'https://appgallery.huawei.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
def data_list(self):
headers = self.headers
list_data = getrealTabId(self.getInterfaceCode)
data_dict = {}
for i in list_data:
page = 1
while True:
print(f'正在采集第{page}页')
params = {
'method': 'internal.getTabDetail',
'serviceType': '20',
'reqPageNum': page, # 做循环
'uri': i,
'maxResults': '50',
'zone': '',
'locale': 'zh',
}
response = requests.get('https://web-drcn.hispace.dbankcloud.com/edge/uowap/index', params=params,
headers=headers)
layoutData = response.json()['layoutData']
if len(layoutData) == 0:
print(f'类型{i}采集结束,共采集{page}页')
break
data = layoutData[0]['dataList']
print(f'数据量:{len(data)}')
#搜索这个接口信息,可添加更多信息
for item in data:
data_dict['appid'] = item['appid']
data_dict['name'] = item['name']
data_dict['kindName'] = item['kindName']
data_dict['downCountDesc'] = item['downCountDesc']
data_dict['intro'] = item['intro']
data_dict['package'] = item['package']
data_dict['score'] = item['score']
data_dict['tagName'] = item['tagName']
data_dict['appVersionName'] = item['appVersionName']
data_dict['enterprise'] = self.data_intro(item['appid'])
print(data_dict)
page += 1
def data_intro(self,appid):
#简介内容
headers = self.headers
params = {
'method': 'internal.getTabDetail',
'serviceType': '20',
'reqPageNum': '1',
'maxResults': '25',
'uri': f'app|{appid}',
'shareTo': '',
'currentUrl': 'https%3A%2F%2Fappgallery.huawei.com%2Fapp%2FC107863167',
'accessId': '',
'appid': appid,
'zone': '',
'locale': 'zh',
}
response = requests.get('https://web-drcn.hispace.dbankcloud.com/edge/uowap/index', params=params,
headers=headers)
intra_data = response.json()['layoutData']
data = intra_data[8]['dataList']
for item in data:
return item['developer']
if __name__ == '__main__':
r= HuaweiSpiderApp()
r.data_list()
工具类tools
import requests
import re
def getInterfaceCode():
# 获取会话id
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'https://appgallery.huawei.com',
'Referer': 'https://appgallery.huawei.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
json_data = {
'params': {},
'zone': '',
'locale': 'zh',
}
url = 'https://web-drcn.hispace.dbankcloud.com/edge/webedge/getInterfaceCode'
response = requests.post(
url,
headers=headers,
json=json_data,
)
InterfaceCode = response.text
cleaned_string = re.sub(r'"', '', InterfaceCode)
return cleaned_string
def getrealTabId(getInterfaceCode):
#获取所有分类id
headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Interface-Code': getInterfaceCode,
'Origin': 'https://appgallery.huawei.com',
'Referer': 'https://appgallery.huawei.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
'sec-ch-ua': '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
params = {
'method': 'internal.getTabDetail',
'serviceType': '20',
'reqPageNum': '1',
'uri': 'b2b4752f0a524fe5ad900870f88c11ed',
'maxResults': '25',
'zone': '',
'locale': 'zh',
}
response = requests.get('https://web-drcn.hispace.dbankcloud.com/edge/uowap/index', params=params, headers=headers)
list = response.json()['tabInfo']
realTabId = []
for i in list:
for j in i['tabInfo']:
id = j['realTabId']
title = j['tabName']
realTabId.append(id)
return realTabId
- API
- 支持模型类型