二开ihoneyBakFileScan备份扫描

低调求绕过,没事写博客

已于 2024-09-13 10:36:51 修改

阅读量413

点赞数 3

文章标签： python 开发语言

于 2024-09-12 19:47:25 首次发布

本文链接：https://blog.csdn.net/qq_35013491/article/details/142184329

版权

优点：可以扫描根据域名生成的扫描备份的扫描工具

二开部分：默认网址到字典（容易被封），二开字典到网址（类似test404备份扫描规则），同时把被封不能扫描的网址保存到waf_url.txt 中，换个热点线程调低继续开扫。

用法：备份字典：

python38 ihoneyBakFileScan_Modify_fx_smart.py -t 10 -f url.txt -o result_smart.txt

# -*- coding: UTF-8 -*-

import requests
import logging
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from hurry.filesize import size
from fake_headers import Headers
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import urllib.parse
from tqdm import tqdm  # 引入 tqdm 库

requests.packages.urllib3.disable_warnings()

logging.basicConfig(level=logging.WARNING, format="%(message)s")

# 存放不存活 URL 的集合，用于跳过重复的 URL
dead_urls = set()

# 检查 URL 是否存活的函数
def check_url_status(url, retries=3):
    for _ in range(retries):
        try:
            response = requests.head(url, timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            if 200 <= response.status_code < 600:
                return response.status_code
        except Exception:
            pass
        time.sleep(1)  # 等待 1 秒后重试
    return None

# 执行 URL 扫描的函数
def vlun(urltarget, retries=3):
    # 如果 URL 在不存活 URL 列表中，直接返回
    if urltarget in dead_urls:
        return

    for _ in range(retries):
        try:
            if proxies:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            else:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False)
            
            if r.status_code == 200 and all(keyword not in r.headers.get('Content-Type', '') for keyword in ['html', 'image', 'xml', 'text', 'json', 'javascript']):
                tmp_rarsize = int(r.headers.get('Content-Length', 0))
                rarsize = str(size(tmp_rarsize))
                if int(rarsize[:-1]) > 0:
                    logging.warning('[ success ] {}  size:{}'.format(urltarget, rarsize))
                    with open(outputfile, 'a') as f:
                        try:
                            f.write(str(urltarget) + '  ' + 'size:' + str(rarsize) + '\n')
                        except Exception as e:
                            logging.warning(f"[ error ] Writing result failed: {e}")
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
            else:
                logging.warning('[ fail ] {}'.format(urltarget))
                return
        except Exception as e:
            logging.warning('[ fail ] {}'.format(urltarget))
        time.sleep(1)  # 等待 1 秒后重试

    # URL 不存活，记录根地址
    root_url = urllib.parse.urljoin(urltarget, '/')
    if root_url not in dead_urls:
        dead_urls.add(root_url)
        with open('waf_url.txt', 'a') as f:
            f.write(root_url + '\n')

# 处理 URL 检查的函数
def urlcheck(target=None, ulist=None):
    if target is not None and ulist is not None:
        if target.startswith('http://') or target.startswith('https://'):
            if target.endswith('/'):
                ulist.append(target)
            else:
                ulist.append(target + '/')
        else:
            line = 'http://' + target
            if line.endswith('/'):
                ulist.append(line)
            else:
                ulist.append(line + '/')
        return ulist

# 分发处理 URL 的函数
def dispatcher(url_file=None, url=None, max_thread=20, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist = urlcheck(line, urllist)
                else:
                    break
    elif url is not None and url_file is None:
        url = str(url.strip())
        urllist = urlcheck(url, urllist)
    else:
        pass

    with open(outputfile, 'a'):
        pass

    check_urllist = []
    for u in urllist:
        cport = None
        if u.startswith('http://'):
            ucp = u.lstrip('http://')
        elif u.startswith('https://'):
            ucp = u.lstrip('https://')
        if '/' in ucp:
            ucp = ucp.split('/')[0]
        if ':' in ucp:
            cport = ucp.split(':')[1]
            ucp = ucp.split(':')[0]
            www1 = ucp.split('.')
        else:
            www1 = ucp.split('.')
        wwwlen = len(www1)
        wwwhost = ''
        for i in range(1, wwwlen):
            wwwhost += www1[i]

        current_info_dic = deepcopy(dic)
        suffixFormat = ['.zip','.rar','.txt','.tar.gz','.tgz','.gz']
        domainDic = [ucp, ucp.replace('.', ''), ucp.replace('.', '_'), wwwhost, ucp.split('.', 1)[-1],
                     (ucp.split('.', 1)[1]).replace('.', '_'), www1[0], www1[1]]
        domainDic = list(set(domainDic))
        for s in suffixFormat:
            for d in domainDic:
                current_info_dic.extend([d + s])
        current_info_dic = list(set(current_info_dic))
        for info in current_info_dic:
            url = str(u) + str(info)
            check_urllist.append(url)

    urlist_len = len(urllist)
    check_urllist_len = len(check_urllist)
    per_distance = int(check_urllist_len / urlist_len)

    l = []
    p = ThreadPoolExecutor(max_thread)

    # 使用 tqdm 显示进度条
    with tqdm(total=check_urllist_len, desc="Scanning URLs") as pbar:
        futures = []
        for index1 in range(0, per_distance):
            for index2 in range(0, urlist_len):
                index = index2 * per_distance + index1
                if index < check_urllist_len:
                    url = check_urllist[index]
                    futures.append(p.submit(vlun, url))
        
        for future in as_completed(futures):
            future.result()  # 等待任务完成
            pbar.update(1)  # 更新进度条

    p.shutdown()

if __name__ == '__main__':
    usageexample = '\n       Example: python3 ihoneyBakFileScan_Modify.py -t 100 -f url.txt -o result.txt\n'
    usageexample += '                '
    usageexample += 'python3 ihoneyBakFileScan_Modify.py -u https://www.example.com/ -o result.txt'

    parser = ArgumentParser(add_help=True, usage=usageexample, description='A Website Backup File Leak Scan Tool.')
    parser.add_argument('-f', '--url-file', dest="url_file", help="Example: url.txt", default="url.txt")
    parser.add_argument('-t', '--thread', dest="max_threads", nargs='?', type=int, default=1, help="Max threads")
    parser.add_argument('-u', '--url', dest='url', nargs='?', type=str, help="Example: http://www.example.com/")
    parser.add_argument('-d', '--dict-file', dest='dict_file', nargs='?', help="Example: dict.txt")
    parser.add_argument('-o', '--output-file', dest="output_file", help="Example: result.txt", default="result.txt")
    parser.add_argument('-p', '--proxy', dest="proxy", help="Example: socks5://127.0.0.1:1080")

    args = parser.parse_args()
    tmp_suffixFormat = ['.zip','.rar','.txt','.tar.gz','.tgz','.gz']
    tmp_info_dic = ['%e5%95%86%e5%9f%8e','%e5%a4%87%e4%bb%bd','%e5%ae%89%e8%a3%85%e6%96%87%e4%bb%b6','%e6%95%b0%e6%8d%ae','%e6%95%b0%e6%8d%ae%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93','%e6%95%b0%e6%8d%ae%e5%ba%93%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93%e6%96%87%e4%bb%b6','%e6%95%b4%e7%ab%99','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(1)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(2)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(3)','%e6%96%b0%e5%bb%ba%e6%96%87%e6%9c%ac%e6%96%87%e6%a1%a3','%e6%9c%8d%e5%8a%a1%e5%99%a8','%e6%a8%a1%e6%9d%bf','%e6%ba%90%e7%a0%81','%e7%a8%8b%e5%ba%8f','%e7%ab%99%e7%82%b9','%e7%bd%91%e7%ab%99','%e7%bd%91%e7%ab%99%e5%a4%87%e4%bb%bd','%e8%af%b4%e6%98%8e','__zep__/js','1','10','11','111','111111','123','123123','1234','12345','123456','127.0.0.1','1314','1980','1981','1982','1983','1984','1985','1986','1987','1988','1989','1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','2','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','2029','2030','3','4','5','520','6','7','7z','8','9','a','aboard','access.log','add','addr','address','adm','admin','ajax','alditor','alditorimage','app','archive','asp','aspx','attach','auth','b','back','backup','backupdata','backups','bak','bb','bbs','beian','beifen','bf','bin','bin/bin','bin/bin1','bin/bin2','bin/dll','bin/dll1','bin/dll2','bin1','bin2','board','boss','browser','bz2','c','captcha','ceshi','cgi','cheditor','cheditor4','cheditor5','chximage','chxupload','ckeditor','clients','cms','code','com','common','config','connectors','contents','copy','copy05','cust','customers','d','dat','data','databack','databackup','databak','databakup','database','databasebak','datad','daumeditor','db','dbadmin','dbcon','dbmysql','dede','dedecms','default','dev','dingdan','div','dll','dntb','do','doc','download','dump','dz','e','edit','editor','email','emfo','engine','entries','error','error_log','example','ezweb','f','faisunzip','fck2','fckeditor','file','filemanager','filename','files','fileswf','fileupload','fileuploadsubmit','flash','flashfxp','form','forum','fourm','ftp','ftp1','g','gbk','gg','good','gz','h','hdocs','help','home','htdocs','htm','html','htmleditor','http','i','idcontent','idx','iis','iisweb','image','imageeditor','imagefrm','images','imageup','imageupload','img','imgupload','inc','include','index','insert','install','ir1','j','joomla','js','jsp','k','keycode','kind2','kneditor','l','lib','library','like','line','list','local','localhost','log','m','mail','manageadmin','master','material','mdb','media','members','message','min','mng','modify','multi','my','mysql','n','navereditor','new','newwebadmin','o','oa','ok','old','openwysiwyg','orders','p','paper','pass','password','photo','php','phpcms','phpmyadmin','pic','plugin','plus','pma','pop','popup','popups','popupsgecko','post','prcimageupload','productedit','q','quick','r','raineditor','rar','release','resource','root','s','s111','sales','sample','samples','scm','se2','seditordemo','seed','sell','server','shop','shu','shuju','shujuku','site','siteweb','sj','sjk','smart','smarteditor','smarteditor2','smarteditor2skin','spac','sql','sqldata','src0811','store','swf','swfupload','system','t','tar','tdi','tdmain','temp','template','test','tgz','tv','txt','typeimage0','u','ueditor','update','upfile','upfiles','upload','uploadaspx','uploader','uploadpic','uploadpopup','uploadtest','upphoto','upphoto2','user','userlist','users','v','v14','vb','vip','w','wangzhan','web','web1','webadmin','webbak','webconfig','webedit','webmysql','webroot','webservice','website','wm123','wordpress','wp','write','ww','wwroot','www','wwwroot','wx','wysiwyg','wz','x','xxx','y','ysc','z','z9v8flashfxp','z9v8ftp','zip','安装文件','备份','程序','服务器','模板','商城','数据','数据备份','数据库','数据库备份','数据库文件','说明','网站','网站备份','新建文本文档','新建文件夹','新建文件夹(1)','新建文件夹(2)','新建文件夹(3)','源码','站点','整站']
    info_dic = []
    for a in tmp_info_dic:
        for b in tmp_suffixFormat:
            info_dic.extend([a + b])

    global outputfile
    if args.output_file:
        outputfile = args.output_file
    else:
        outputfile = 'result.txt'
    global proxies
    if args.proxy:
        proxies = {
            'http': args.proxy,
            'https': args.proxy
        }
    else:
        proxies = ''
    header = Headers(
        headers=False
    )

    timeout = 10

    try:
        if args.dict_file:
            custom_dict = list(set([i.replace("\n", "") for i in open(str(args.dict_file), "r").readlines()]))
            info_dic.extend(custom_dict)
        if args.url:
            dispatcher(url=args.url, max_thread=args.max_threads, dic=info_dic)
        elif args.url_file:
            dispatcher(url_file=args.url_file, max_thread=args.max_threads, dic=info_dic)
        else:
            print("[!] Please specify a URL or URL file name")
    except Exception as e:
        print(e)

自己的字典：dicc.txt 就用自己的吧，

python38 ihoneyBakFileScan_Modify_fx_dir.py -t 10 -f url.txt -o results_dir.txt

# -*- coding: UTF-8 -*-
 
import requests
import logging
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from hurry.filesize import size
from fake_headers import Headers
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import urllib.parse
from tqdm import tqdm  # 引入 tqdm 库
 
requests.packages.urllib3.disable_warnings()
 
logging.basicConfig(level=logging.WARNING, format="%(message)s")
 
# 存放不存活 URL 的集合，用于跳过重复的 URL
dead_urls = set()
 
# 自定义404页面路径
custom_404_path = '/8krrotrejtwejt3563657sewWWER'
 
# 常见404页面的关键词和特征
common_404_keywords = ['404', 'not found', 'page not found', 'error 404', 'page does not exist']
 
# 自定义404页面判断函数
def is_custom_404(url, retries=3):
    for _ in range(retries):
        try:
            response = requests.get(url + custom_404_path, timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            # 200 或 301 状态码视为自定义404
            if response.status_code in [200, 301]:
                return True
        except Exception:
            pass
        time.sleep(1)  # 等待 1 秒后重试
    return False
 
# 判断是否为常见404页面
def is_common_404(response):
    # 检查响应状态码
    if response.status_code == 404:
        # 解析响应内容
        content = response.text.lower()
        # 检查内容中是否包含常见404的关键词
        for keyword in common_404_keywords:
            if keyword in content:
                return True
    return False
 
# 执行 URL 扫描的函数
def vlun(urltarget, retries=3):
    # 如果 URL 在不存活 URL 列表中，直接返回
    if urltarget in dead_urls:
        return
 
    for _ in range(retries):
        try:
            if proxies:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            else:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False)
            
            # 检查是否为自定义404页面
            if is_custom_404(urltarget):
                logging.warning('[ fail ] {} (Custom 404 page detected)'.format(urltarget))
                return
 
            # 其他成功或失败的判断
            if  199 <  r.status_code < 402 :
                tmp_rarsize = int(r.headers.get('Content-Length', 0))
                rarsize = str(size(tmp_rarsize))
                if int(rarsize[:-1]) > 0:
                    logging.warning('[ success ] {}  size:{}'.format(urltarget, rarsize))
                    with open(outputfile, 'a') as f:
                        try:
                            f.write(str(urltarget) + '  ' + 'size:' + str(rarsize) + '\n')
                        except Exception as e:
                            logging.warning(f"[ error ] Writing result failed: {e}")
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
            else:
                # 检查是否为常见的404页面
                if is_common_404(r):
                    logging.warning('[ fail ] {} (Common 404 page detected)'.format(urltarget))
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
        except Exception as e:
            logging.warning('[ fail ] {}'.format(urltarget))
        time.sleep(1)  # 等待 1 秒后重试
 
    # URL 不存活，记录根地址
    root_url = urllib.parse.urljoin(urltarget, '/')
    if root_url not in dead_urls:
        dead_urls.add(root_url)
        with open('waf_url.txt', 'a') as f:
            f.write(root_url + '\n')
 
# 处理 URL 检查的函数
def urlcheck(target=None, ulist=None):
    if target is not None and ulist is not None:
        if target.startswith('http://') or target.startswith('https://'):
            if target.endswith('/'):
                ulist.append(target)
            else:
                ulist.append(target + '/')
        else:
            line = 'http://' + target
            if line.endswith('/'):
                ulist.append(line)
            else:
                ulist.append(line + '/')
        return ulist
 
# 从字典文件中读取字典项
def load_dict_file(dict_file='dicc.txt'):
    dic = []
    with open(dict_file, 'r') as f:
        for line in f:
            stripped_line = line.strip()
            if stripped_line:
                dic.append(stripped_line.lstrip('/'))
    return dic
 
# 生成所有扫描 URL
def generate_combinations(url, dic):
    combinations = []
    for entry in dic:
        full_url = urllib.parse.urljoin(url, entry)
        combinations.append(full_url)
    return combinations
 
def dispatcher(url_file=None, url=None, max_thread=20, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist = urlcheck(line, urllist)
                else:
                    break
    elif url is not None and url_file is None:
        url = str(url.strip())
        urllist = urlcheck(url, urllist)
    else:
        pass
 
    with open(outputfile, 'a'):
        pass
 
    check_urllist = []
    for u in urllist:
        # 先检查根 URL 是否为自定义 404 页面
        if is_custom_404(u):
            logging.warning(f'[ fail ] {u} (Custom 404 page detected)')
            continue
 
        # 生成所有需要扫描的 URL
        check_urllist.extend(generate_combinations(u, dic))
 
    urlist_len = len(urllist)
    check_urllist_len = len(check_urllist)
    per_distance = int(check_urllist_len / urlist_len)
 
    l = []
    p = ThreadPoolExecutor(max_thread)
 
    # 使用 tqdm 显示进度条
    with tqdm(total=check_urllist_len, desc="Scanning URLs") as pbar:
    # 任务执行的代码
        futures = []
        for index1 in range(0, per_distance):
            for index2 in range(0, urlist_len):
                index = index2 * per_distance + index1
                if index < check_urllist_len:
                    url = check_urllist[index]
                    futures.append(p.submit(vlun, url))
        
        for future in as_completed(futures):
            future.result()  # 等待任务完成
            pbar.update(1)  # 更新进度条
    
    p.shutdown()
 
 
if __name__ == '__main__':
    usageexample = '\n       Example: python3 ihoneyBakFileScan_Modify.py -t 100 -f url.txt -o result.txt\n'
    usageexample += '                '
    usageexample += 'python3 ihoneyBakFileScan_Modify.py -u https://www.example.com/ -o result.txt'
 
    parser = ArgumentParser(add_help=True, usage=usageexample, description='A Website Backup File Leak Scan Tool.')
    parser.add_argument('-f', '--url-file', dest="url_file", help="Example: url.txt", default="url.txt")
    parser.add_argument('-t', '--thread', dest="max_threads", nargs='?', type=int, default=1, help="Max threads")
    parser.add_argument('-u', '--url', dest='url', nargs='?', type=str, help="Example: http://www.example.com/")
    parser.add_argument('-o', '--output-file', dest="output_file", help="Example: result.txt", default="result.txt")
    parser.add_argument('-p', '--proxy', dest="proxy", help="Example: socks5://127.0.0.1:1080")
 
    args = parser.parse_args()
 
    # 从默认字典文件中加载自定义字典
    info_dic = load_dict_file('dicc.txt')
 
    global outputfile
    if (args.output_file):
        outputfile = args.output_file
    else:
        outputfile = 'result.txt'
    # 添加代理
    global proxies
    if (args.proxy):
        proxies = {
            'http': args.proxy,
            'https': args.proxy
        }
    else:
        proxies = ''
    header = Headers(
        # generate any browser
    )
 
    timeout = 5  # Define the timeout value
 
    dispatcher(url_file=args.url_file, url=args.url, max_thread=args.max_threads, dic=info_dic)

python38 -m pip install -r pip.txt

fake_headers==1.0.2
hurry==1.1
hurry.filesize==0.9
requests==2.31.0

低调求绕过,没事写博客

关注

3
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫