xray html to xlsx数据整理脚本分享
'''
!!!注意将要整理的xray扫描的html报告放入xray_report_dir下,直接运行即可
1.设置全局目录变量,并判断全局目录是否存在,若不存在即创建;
2.遍历xray_report_dir目录下所有html文件;
3.读取遍历的所有html文件内容;
4.re正则匹配漏洞信息大致范围;
5.再利用正则匹配单个关键字数据(create_time、target、plug、extra)并将提取的create_time的时间戳转换成日期格式;
6.将所要填充的关键字数据添加到vuln_data列表当中;
7.创建数据框架,并将vuln_data列表中数据填充对应自定义列字段值下;
8.判断file_path目录下是否存在备份文件 *.bak;否则直接移除;
9.判断要写入的excel文件是否存在,若存在即重命名 *.bak后再写入excel文件中;否则直接写入;
'''
import re
import os
import glob
import datetime
import pandas as pd
file_path = 'result'
xray_report_dir = 'xray_report_dir'
vuln_data = []
if not os.path.exists(file_path):
os.mkdir(file_path)
if not os.path.exists(xray_report_dir):
os.mkdir(xray_report_dir)
html_files = glob.glob(os.path.join(xray_report_dir, '*.html'))
for html_file in html_files:
file_name = os.path.splitext(os.path.basename(html_file))[0]
file = os.path.join(xray_report_dir,file_name + '.html')
with open(file ,'r', encoding='utf-8') as f:
print("读取文件:"+file)
html = f.read()
vuln_info = re.findall(r"<script class='web-vulns'>webVulns.push\((.*?)\)</script>", html, re.M | re.I)
for info in vuln_info:
create_time = re.search(r'create_time":(\d+)', info).group(1)
timestamp = int(create_time)
date = datetime.datetime.fromtimestamp(timestamp / 1000.0)
plugin = re.search(r'plugin":"([^"]+)"', info).group(1)
target = re.search(r'target":{"url":"([^"]+)"', info).group(1)
extra = re.search(r'extra":({[^}]+})', info).group(1)
vuln_data.append({'create_time': date, 'target': target, 'PluginName/VulnType': plugin, 'Extra': extra})
df = pd.DataFrame(vuln_data, columns=['create_time', 'target', 'PluginName/VulnType', 'Extra'])
if os.path.exists(file_path+'/xray_vulnerabilities.xlsx.bak'):
os.remove(file_path+'/xray_vulnerabilities.xlsx.bak')
if os.path.exists(file_path+'/xray_vulnerabilities.xlsx'):
os.renames(file_path+'/xray_vulnerabilities.xlsx',file_path+'/xray_vulnerabilities.xlsx.bak')
df.to_excel(file_path+'/xray_vulnerabilities.xlsx', index=False)
else:
df.to_excel(file_path+'/xray_vulnerabilities.xlsx', index=False)