应用市场爬虫-爬取-应用名、包名、版本、安装量、大小、
分类等
# _*_ coding:utf-8 _*_
# @Time : 2022/1/1 0001 20:28
# @File :xx.py
# @ :PyCharm
import time
import requests
from urllib import parse
def out_app_data_1(name):
out_url_coding = parse.quote(f"|{name}", encoding="utf-8")
url = "https://web-drcn.hispace.dbankcloud.cn/uowap/index"
params = {
"method": "internal.getTabDetail",
"serviceType": "20",
"reqPageNum": "1",
"uri": f"searchApp{out_url_coding}",
"maxResults": "25",
"version": "10.0.0",
"zone": " ",
"locale": "zh"
}
res = requests.get(url, params=params)
app_message = res.json()["layoutData"][0]["dataList"][0]
pack_name = app_message["name"]
app_id = app_message["appid"] # appid
ID = app_message["ID"] # ID
version_name = app_message["appVersionName"] # 当前版本
download = app_message["downCountDesc"] # 安装次数
icon = app_message["icon"] # 图标下载地址
size_desc = app_message["intro"] # 应用大小
kind_name = app_message["kindName"] # 应用分类
package = app_message["package"] # 应用包名
return {"应用名": pack_name, "包名": package, "版本": version_name, "安装量": download, "大小": size_desc, "分类": kind_name,
"appid": app_id, "ID": ID, "图标 ": icon}
def out_app_data_2(name):
data_1 = out_app_data_1(name)
app = data_1["appid"]
url = "https://web-drcn.hispace.dbankcloud.cn/uowap/index"
params = {
"method": "internal.getTabDetail",
"serviceType": "20",
"reqPageNum": "1",
"maxResults": "25",
# "uri": "app%7CC10168892",
"uri": f"app%7C{app}",
"shareTo": "",
"currentUrl": f"https%253A%252F%252Fappgallery.huawei.com%252Fapp%252F{app}",
"accessId": "",
"appid": {app},
"zone": "",
"locale": "zh"
}
for i in range(5): # 失败后重新运行
try:
res = requests.get(url, params=params)
a = res.json()["layoutData"][0]["dataList"][0]["labelNames"] # 绿标
b = res.json()["layoutData"][8]["dataList"][0]["developer"] # 开发者--所属公司
c = res.json()["layoutData"][8]["dataList"][0]["releaseDate"] # 更新日期
d = res.json()["layoutData"][8]["dataList"][0]["conceal"]["text"] # 隐私政策地址
e = res.json()["layoutData"][8]["dataList"][0]["authority"]["list"] # 权限说明
f = [f"{i['text']}" for i in e] # 权限说明解包
data_2 = {"绿标": a, "开发者": b, "更新日期": c, "隐私政策地址": d, "权限说明": f}
data = dict(data_1, **data_2)
if data["应用名"] != name:
return "下架"
except KeyError as e:
if i == 4:
return "读取失败"
else:
return data
if __name__ == '__main__':
start = time.time()
a = out_app_data_2("微信")
if a != "读取失败" and a != "下架":
print(a["包名"])
else:
print(a)
end = time.time()
print(end-start)