华为防火墙数据爬虫 && 接口接入方案

华为防火墙数据爬虫 && 接口接入方案

环境:

Windows Server 2019

python版本:3.9.12

第三方库:requests、urllib3、json、pandas、base64

FW版本:V600R023C00SPC100

型号:USG6525F 新版前端页面

代码片段为爬取sslvpn登录信息:

import requests

import urllib3,json,html

from urllib.parse import urlencode,quote

import os

import base64

import datetime

import pandas as pd



def to_get_Data(ip,user,passwd):

    urllib3.disable_warnings()  #关闭警告

    session=requests.Session()  #创建会话

    session.verify=False        #关闭证书警告



    token=''

   

#模拟登录

    login_url=f"https://{ip}:8443/login"

    json_={"language":"zh_CN","username":user,"password":passwd,"captcha":"","confirmAlert":False}

response = session.post(url=login_url,json=json_)  #请求登陆

#返回数据为字符串,无法通过json()方法格式化

    if response.text:

        try:

             #尝试格式化数据

            data=json.loads(response.text)

        except Exception as e:

            raise ValueError(e)

        else:

            #获取token

            token=data["swebToken"]

    #

    if token == '':

        raise ValueError("Don't get swbToken!")

    session.get(f"https://{ip}:8443/default.html")

    cookies=session.cookies #获取cookie

    for cookie in cookies:

        cookies_2=(f"{cookie.name}={cookie.value}")

   

    header={

        "Accept": "application/json, text/plain, */*",

        "Accept-Encoding": "gzip, deflate, br",

        "Accept-Language": "zh-CN,zh;q=0.9",

        "Connection": "keep-alive",

        "Content-Length": "125",

        "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",

        "Cookie": cookies_2,

        "Host": f"{ip}:8443",

        "Origin": f"https://{ip}:8443",

        "Referer": f"https://{ip}:8443/default.html",

        "Sec-Fetch-Dest": "empty",

        "Sec-Fetch-Mode": "cors",

        "Sec-Fetch-Site": "same-origin",

        "Token": token,

        "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",

        "sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',

        "sec-ch-ua-mobile": "?0",

        "sec-ch-ua-platform": '"Windows"',

        }

   

    url=f"https://{ip}:8443/common.html"  #数据接口

    json_={"getsslvpnVgMonitorInfo":{"funcName":"getsslvpnVgMonitorInfo","language":1,"vsys":"public","pageIndex":1,"pageSize":100,"VGName":"public"}}

        header["Content-Length"]=f'{len(json.dumps(json_).replace(" ",""))}'

    header["Token"]=token

    session.headers.update(header)

    r = session.post(url,json=json_)

    data=r.json()

    onlineData = data["getsslvpnVgMonitorInfo"]["data"]

    df = pd.DataFrame(onlineData)

    df.index = df.index+1

    date=datetime.datetime.today().strftime("%Y%m%d_%H%M%S")

    date2=datetime.datetime.today().strftime("%H-%M-%S")

    df.to_excel(f"sslvpn在线情况{date}.xlsx",sheet_name=date2)



if __name__ == "__main__":

    ip='x.x.x.x' 

    sshuser = 'xxxxx'

    passwd = 'xxxxx'

    to_get_Data(ip=ip,user=sshuser,passwd=passwd)



若前端存在验证码,使用ddddocr库识别,改前端版本识别成功率约为50%,使用for循序连续登录,切勿使用while防止程序卡死。使用以下代码替换模拟登录部分。

for i in range(0,10):

        captche_url=f'https://{ip}:8443/captche/?_dcp=1701824760292'

        header={

            "Accept": "application/json, text/plain, */*",

            "Accept-Encoding": "gzip, deflate, br",

            "Accept-Language": "zh-CN,zh;q=0.9",

            "Connection": "keep-alive",

            "Host": f"{ip}:8443",

            "Referer": f"https://{ip}:8443/login.html",

            "Sec-Fetch-Dest": "empty",

            "Sec-Fetch-Mode": "cors",

            "Sec-Fetch-Site": "same-origin",

            "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",

            "sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',

            "sec-ch-ua-mobile": "?0",

            "sec-ch-ua-platform": '"Windows"'

        }

        rev=session.get(url=captche_url,headers=header)

        if rev.text != '':

            image_=json.loads(rev.text)

        #data:image/png;base64,

        img=f'{image_["image"]}'

        image_bytes = base64.b64decode(img)  

        ocr = ddddocr.DdddOcr()

        captcha = ocr.classification(image_bytes).upper()



        cookies=session.cookies #获取cookie

        for cookie in cookies:

            cookies_1=(f"{cookie.name}={cookie.value}")

        header={

            "Accept": "application/json, text/plain, */*",

            "Accept-Encoding": "gzip, deflate, br",

            "Accept-Language": "zh-CN,zh;q=0.9",

            "Connection": "keep-alive",

            "Content-Length": "103",

            "Content-Type": "application/json",

            "Cookie": cookies_1,

            "Host": f"{ip}:8443",

            "Origin": f"https://{ip}:8443",

            "Referer": f"https://{ip}:8443/login.html",

            "Sec-Fetch-Dest": "empty",

            "Sec-Fetch-Mode": "cors",

            "Sec-Fetch-Site": "same-origin",

            "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",

            "sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',

            "sec-ch-ua-mobile": "?0",

            "sec-ch-ua-platform": '"Windows"'

        }

        login_url=f"https://{ip}:8443/login"



        json_={"language":"zh_CN","username":user,"password":passwd,"captcha":captcha,"confirmAlert":False}

        response = session.post(headers=header,url=login_url,json=json_)  #请求登陆

        if response.text:

            try:

                data=json.loads(response.text)

            except Exception as e:

                continue

            else:

                token=data["swebToken"]

                if token == '':

                    continue

                break

        else:

            continue

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值