python爬取微信小程序数据,如何爬取小程序数据

最新推荐文章于 2024-06-25 21:28:44 发布

Fixf4556

最新推荐文章于 2024-06-25 21:28:44 发布

阅读量512

点赞数 7

文章标签：人工智能

本文链接：https://blog.csdn.net/Fixf4556/article/details/136463585

版权

这篇文章主要介绍了python爬取微信小程序数据，具有一定借鉴价值，需要的朋友可以参考下。希望大家阅读完这篇文章后大有收获，下面让小编带着大家一起了解一下。

Source code download: 本文相关源码

Python爬虫系列之微信小程序云函数、sessionid、小程序code、支付算法

为维护网络绿色健康发展，代码仅供学习交流，请勿用于非法用途，如有疑问，请点击这里与我交流

直接上代码

wxSpider.py

# -*- coding:utf-8 -*-
import requests
import time
import json
import matplotlib.pyplot as plt
import base64
import skimage.io


host = "http://***.***.**.**"
port = "1029"
retry = 3
timeout = 30


def getHtml(url):
    for i in range(retry):
        try:
            resp = requests.get(url, timeout=timeout)
            return resp.json()
        except Exception as e:
            pass


def postHtml(url, data):
    for i in range(retry):
        try:
            resp = requests.post(url, data=json.dumps(data), headers={"content-type": "application/json"}, timeout=timeout)
            return resp.json()
        except Exception as e:
            pass


def gethwnd(accountId):
    '''
    检测登录窗口句柄状态
    loginhwnd=登陆窗口句柄  或  mainhwnd=主窗口句柄  登陆句柄>0说明处于未登陆状态或者是首次登陆正在初始化， 主句柄>0说明登陆成功，初始化完毕，可以调用其他功能了
    :param accountId:
    :return:
    '''
    url = host + ":" + port + "/api/Gethwnd"
    data = {"accountId": accountId}
    for i in range(retry):
        res = postHtml(url, data)
        try:
            keys = list(res.keys())
            if "loginhwnd" in keys and "mainhwnd" in keys:
                return True, res
        except Exception as e:
            pass
        time.sleep(15)
    return False, None


def openWeChat():
    '''
    启动微信
    :return:
    '''
    url = host + ":" + port + "/api/OpenWeChat"
    res = getHtml(url)
    try:
        return res['accountId']
    except Exception as e:
        pass


def showQrcode(base64_str, accountId):
    '''
    显示登录二维码
    :param base64_str:
    :param accountId:
    :return:
    '''
    if isinstance(base64_str, bytes):
        base64_str = base64_str.decode("utf-8")
    imgdata = base64.b64decode(base64_str)
    img = skimage.io.imread(imgdata, plugin='imageio')
    plt.imshow(img)
    plt.ion()
    startTs = int(time.time())
    loginStatus = False
    while True:
        try:
            plt.pause(2)
        except Exception as e:
            pass
        status, loginMap = gethwnd(accountId)
        if status and int(loginMap['mainhwnd']) > 0:
            loginStatus = True
            break
        else:
            print("请扫描登录二维码！")
            time.sleep(2)
        if int(time.time()) - startTs >= 180:
            break
    try:
        plt.close()
    except Exception as e:
        pass
    return loginStatus


def getLoginQrcode(accountId):
    '''
    获取登录二维码
    :param accountId:
    :return:
    '''
    url = host + ":" + port + "/api/GetLoginQrCode"
    data = {"accountId": int(accountId)}
    for i in range(retry):
        try:
            res = postHtml(url, data)
            print(res)
            return res['QrImgDataBase64']
        except Exception as e:
            pass


def getJsCode(accountId, appid):
    '''
    获取小程序code
    :param accountId:
    :param appid: 目标小程序appid
    :return:
    '''
    url = host + ":" + port + "/api/JsLogin"
    data = {"accountId": accountId, "appid": appid}
    for i in range(retry):
        try:
            res = postHtml(url, data)
            print(res)
            return
        except Exception as e:
            pass


def main():
    '''
    整体流程
    :return:
    '''
    appid = "wxa9795213dd51a4b1"
    accountId = openWeChat()
    print("本次测试的accountId为：" + str(accountId) + " 如若需要测试，请复制该值或缓存该值直接进行测试，避免二次登录浪费资源！")
    status, loginMap = gethwnd(accountId)
    if status:
        qrImgDataBase64 = getLoginQrcode(accountId)
        loginStatus = showQrcode(qrImgDataBase64, accountId)
        if loginStatus:
            print("登录成功！")
            getJsCode(accountId, appid)
        else:
            print("登录超时！")


if __name__ == '__main__':
	'''
		微信windows pc协议
	'''
    # 整体流程测试
    main()
    # 单次流程测试
    # 这个值为整体流程登录成功后获取到的，注释main方法的调用流程，单独测试以下流程！！！
    exit(0)
    accountId = 1280
    appid = "wxa9795213dd51a4b1"
    getJsCode(accountId, appid)

技术交流微信：walei5201314
QQ：804682557