应用市场爬虫

该代码段实现了一个爬虫,从特定的应用市场抓取应用的详细信息,包括应用名、包名、版本、安装量、大小、分类等,并尝试在失败时重新尝试。此外,还获取了绿标、开发者、更新日期、隐私政策地址和权限说明等额外信息。
摘要由CSDN通过智能技术生成

应用市场爬虫-爬取-应用名、包名、版本、安装量、大小、
分类等

# _*_ coding:utf-8 _*_
# @Time      : 2022/1/1 0001 20:28
# @File      :xx.py
# @          :PyCharm
import time
import requests
from urllib import parse

def out_app_data_1(name):
    out_url_coding = parse.quote(f"|{name}", encoding="utf-8")
    url = "https://web-drcn.hispace.dbankcloud.cn/uowap/index"
    params = {
           "method": "internal.getTabDetail",
           "serviceType": "20",
           "reqPageNum": "1",
           "uri": f"searchApp{out_url_coding}",
           "maxResults": "25",
           "version": "10.0.0",
           "zone": " ",
           "locale": "zh"
    }
    
    res = requests.get(url, params=params)
    app_message = res.json()["layoutData"][0]["dataList"][0]

    pack_name = app_message["name"]
    app_id = app_message["appid"]  # appid
    ID = app_message["ID"]  # ID
    version_name = app_message["appVersionName"]  # 当前版本
    download = app_message["downCountDesc"]  # 安装次数
    icon = app_message["icon"]  # 图标下载地址
    size_desc = app_message["intro"]  # 应用大小
    kind_name = app_message["kindName"]  # 应用分类
    package = app_message["package"]  # 应用包名

    return {"应用名": pack_name, "包名": package, "版本": version_name,  "安装量": download, "大小": size_desc, "分类": kind_name,
            "appid": app_id, "ID": ID, "图标 ": icon}


def out_app_data_2(name):
    data_1 = out_app_data_1(name)
    app = data_1["appid"]

    url = "https://web-drcn.hispace.dbankcloud.cn/uowap/index"
    params = {
        "method": "internal.getTabDetail",
        "serviceType": "20",
        "reqPageNum": "1",
        "maxResults": "25",
        # "uri": "app%7CC10168892",
        "uri": f"app%7C{app}",
        "shareTo": "",
        "currentUrl": f"https%253A%252F%252Fappgallery.huawei.com%252Fapp%252F{app}",
        "accessId": "",
        "appid": {app},
        "zone": "",
        "locale": "zh"
    }
    
    for i in range(5):  # 失败后重新运行
        try:
            res = requests.get(url, params=params)
            a = res.json()["layoutData"][0]["dataList"][0]["labelNames"]  # 绿标
            b = res.json()["layoutData"][8]["dataList"][0]["developer"]  # 开发者--所属公司
            c = res.json()["layoutData"][8]["dataList"][0]["releaseDate"]  # 更新日期
            d = res.json()["layoutData"][8]["dataList"][0]["conceal"]["text"]  # 隐私政策地址
            e = res.json()["layoutData"][8]["dataList"][0]["authority"]["list"]  # 权限说明
            f = [f"{i['text']}" for i in e]  # 权限说明解包
            
            data_2 = {"绿标": a, "开发者": b, "更新日期": c, "隐私政策地址": d, "权限说明": f}
            
            data = dict(data_1, **data_2)
            if data["应用名"] != name:
                return "下架"
                
        except KeyError as e:
            if i == 4:
                return "读取失败"
        else:
            return data


if __name__ == '__main__':
    start = time.time()

    a = out_app_data_2("微信")
    
    if a != "读取失败" and a != "下架":
        print(a["包名"])
    else:
        print(a)
    end = time.time()
    print(end-start)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值