华为防火墙数据爬虫 && 接口接入方案
环境:
Windows Server 2019
python版本:3.9.12
第三方库:requests、urllib3、json、pandas、base64
FW版本:V600R023C00SPC100
型号:USG6525F 新版前端页面
代码片段为爬取sslvpn登录信息:
import requests
import urllib3,json,html
from urllib.parse import urlencode,quote
import os
import base64
import datetime
import pandas as pd
def to_get_Data(ip,user,passwd):
urllib3.disable_warnings() #关闭警告
session=requests.Session() #创建会话
session.verify=False #关闭证书警告
token=''
#模拟登录
login_url=f"https://{ip}:8443/login"
json_={"language":"zh_CN","username":user,"password":passwd,"captcha":"","confirmAlert":False}
response = session.post(url=login_url,json=json_) #请求登陆
#返回数据为字符串,无法通过json()方法格式化
if response.text:
try:
#尝试格式化数据
data=json.loads(response.text)
except Exception as e:
raise ValueError(e)
else:
#获取token
token=data["swebToken"]
#
if token == '':
raise ValueError("Don't get swbToken!")
session.get(f"https://{ip}:8443/default.html")
cookies=session.cookies #获取cookie
for cookie in cookies:
cookies_2=(f"{cookie.name}={cookie.value}")
header={
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "125",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Cookie": cookies_2,
"Host": f"{ip}:8443",
"Origin": f"https://{ip}:8443",
"Referer": f"https://{ip}:8443/default.html",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Token": token,
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
}
url=f"https://{ip}:8443/common.html" #数据接口
json_={"getsslvpnVgMonitorInfo":{"funcName":"getsslvpnVgMonitorInfo","language":1,"vsys":"public","pageIndex":1,"pageSize":100,"VGName":"public"}}
header["Content-Length"]=f'{len(json.dumps(json_).replace(" ",""))}'
header["Token"]=token
session.headers.update(header)
r = session.post(url,json=json_)
data=r.json()
onlineData = data["getsslvpnVgMonitorInfo"]["data"]
df = pd.DataFrame(onlineData)
df.index = df.index+1
date=datetime.datetime.today().strftime("%Y%m%d_%H%M%S")
date2=datetime.datetime.today().strftime("%H-%M-%S")
df.to_excel(f"sslvpn在线情况{date}.xlsx",sheet_name=date2)
if __name__ == "__main__":
ip='x.x.x.x'
sshuser = 'xxxxx'
passwd = 'xxxxx'
to_get_Data(ip=ip,user=sshuser,passwd=passwd)
若前端存在验证码,使用ddddocr库识别,改前端版本识别成功率约为50%,使用for循序连续登录,切勿使用while防止程序卡死。使用以下代码替换模拟登录部分。
for i in range(0,10):
captche_url=f'https://{ip}:8443/captche/?_dcp=1701824760292'
header={
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Host": f"{ip}:8443",
"Referer": f"https://{ip}:8443/login.html",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"'
}
rev=session.get(url=captche_url,headers=header)
if rev.text != '':
image_=json.loads(rev.text)
#data:image/png;base64,
img=f'{image_["image"]}'
image_bytes = base64.b64decode(img)
ocr = ddddocr.DdddOcr()
captcha = ocr.classification(image_bytes).upper()
cookies=session.cookies #获取cookie
for cookie in cookies:
cookies_1=(f"{cookie.name}={cookie.value}")
header={
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "103",
"Content-Type": "application/json",
"Cookie": cookies_1,
"Host": f"{ip}:8443",
"Origin": f"https://{ip}:8443",
"Referer": f"https://{ip}:8443/login.html",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36",
"sec-ch-ua": '"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"'
}
login_url=f"https://{ip}:8443/login"
json_={"language":"zh_CN","username":user,"password":passwd,"captcha":captcha,"confirmAlert":False}
response = session.post(headers=header,url=login_url,json=json_) #请求登陆
if response.text:
try:
data=json.loads(response.text)
except Exception as e:
continue
else:
token=data["swebToken"]
if token == '':
continue
break
else:
continue