前些天看到一个文章写的是关于网站中JS敏感信息泄露相关的(文章传送门),感觉确实存在这个问题,但是当检查起来有些费劲,尤其的多个url时,要一个一个在页面中找点进去搜索关键字,十分麻烦,所以闲着没事就做了一个对批量url进行JS敏感信息检查的PY脚本;
具体功能如下:
- 兼容dirsearch扫描所生成的文档(需要筛选出30x/200的url);
- 指定需要探测的批量url;
- 对页面内的JS(包括动态)进行敏感信息检测;
- 可以指定想要检测的关键字;
- 下载探测到的JS文档,并输出检测结果;
- 指定dirsearch文档(2.txt)
200 2KB https://demo.careyshop.cn:443/admin/0
200 2KB https://demo.careyshop.cn:443/admin/index
200 2KB https://demo.careyshop.cn:443/admin/index.html
- 指定检测关键字(key.txt)
phone
email
ip
.html
.do
.action
- 输出结果
['200 2KB https://demo.careyshop.cn:443/admin/0', '', '200 2KB https://demo.careyshop.cn:443/admin/index', '', '200 2KB https://demo.careyshop.cn:443/admin/index.html']
https://demo.careyshop.cn:443/static/admin/static/config/production.js?1.8.5
https://hm.baidu.com/hm.js?e325e60ca4cd358f2b424f5aecb8021a
https://demo.careyshop.cn:443/static/admin/js/chunk-vendors.7e8a3f92.careyshop.js
https://demo.careyshop.cn:443/static/admin/js/app.d4044e39.careyshop.js
['https://demo.careyshop.cn:443/static/admin/js/app.d4044e39.careyshop.js', 'https://demo.careyshop.cn:443/static/admin/static/config/production.js?1.8.5', 'https://demo.careyshop.cn:443/static/admin/js/chunk-vendors.7e8a3f92.careyshop.js', 'https://hm.baidu.com/hm.js?e325e60ca4cd358f2b424f5aecb8021a']
{'phone': ['2.js'], 'email': ['2.js'], 'ip': ['2.js', '3.js', '4.js'], '.html': ['3.js', '4.js'], '.do': ['2.js', '3.js', '4.js'], '.action': ['2.js']}
详细代码:
import os
import re
from selenium import webdriver
def get_js_url(url_list):
result_all = []
result_dist = {}
driver=webdriver.Chrome() #调用本地的Chrom 甚至 Ie 也可以的
for url in url_list:
driver.get(url) #请求页面,会打开一个浏览器窗口
html_text=driver.page_source
result = re.findall(r"""[href|src]=['|"]([^"' ]*)['|"]""", html_text, re.I)
for i in result:
if ".js" in i:
result_all.append(i)
result_dist[url]=result_all
driver.quit()
return result_dist
def get_js_text(url_dist):
driver = webdriver.Chrome()
js_url_list = []
k= 0
for url_index in url_dist:
url_head = re.findall(r"""\w+://[^/]*""", url_index, re.I)
for i in url_dist[url_index]:
if i[0]=="/":
i=url_head[0]+i
if i not in js_url_list:
js_url_list.append(i)
else:
break
driver.get(i)
js_text = driver.page_source
print(i)
id = i.split("/")[-1]
if "?" in id:
id=id.split('?')[0]
url_head_name = re.findall(r"""[^:0-9]+""", url_head[0].split("//")[1], re.I)[0]
try:
os.mkdir(f"js/{url_head_name}")
except:
pass
file_handle = open(f'js/{url_head_name}/{k}.js', mode='w',encoding='utf-8')
file_handle.write(f"\n-------{i}-------\n")
file_handle.write(f"\n-------{id}-------\n\n")
file_handle.write(js_text)
file_handle.close()
k+=1
driver.quit()
js_url_list = list(set(js_url_list))
print(js_url_list)
return js_url_list,url_head_name
def import_url(src):
f = open(str(src))
txt=[]
for line in f:
txt.append(line.strip())
print(txt)
url_list= []
for i in txt:
if i!="":
pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式
url = re.findall(pattern, i)
url_list+=url
else:
pass
return url_list
def file_name(file_dir):
for root, dirs, files in os.walk(file_dir):
# print(root) #当前目录路径
# print(dirs) #当前路径下所有子目录
# print(files) #当前路径下所有非目录子文件
return root,dirs,files
def detect_vul(file_dir,key_src):
js_name_dist = {}
keywords_read = open(key_src,"r").readlines()
keywords = []
for i in keywords_read:
keywords.append(i.strip())
_, _, js_names = file_name(file_dir)
for word in keywords:
key_js_name = []
for name in js_names:
js_text = open(f"{file_dir}/{name}", 'rb').read().decode("utf-8", 'ignore')
if f"{word}" in js_text:
key_js_name.append(name)
else:
pass
js_name_dist[word]=key_js_name
return js_name_dist
if __name__=="__main__":
url_list = import_url("2.txt")
url_dist = get_js_url(url_list)
js_url_list,url_head_name = get_js_text(url_dist)
js_name_dist = detect_vul(f"js/{url_head_name}","key.txt")
print(js_name_dist)
启动不成功,则需要安装指定的浏览器驱动版本