二开ihoneyBakFileScan备份扫描

优点:可以扫描根据域名生成的扫描备份的扫描工具

二开部分:默认网址到字典(容易被封),二开字典到网址(类似test404备份扫描规则),同时把被封不能扫描的网址保存到waf_url.txt 中,换个热点线程调低继续开扫。

用法:

大备份字典:

python38 ihoneyBakFileScan_Modify_fx_big.py -t 10 -f url.txt -o result_big.txt

# -*- coding: UTF-8 -*-

import requests
import logging
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from hurry.filesize import size
from fake_headers import Headers
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import urllib.parse
from tqdm import tqdm  # 引入 tqdm 库

requests.packages.urllib3.disable_warnings()

logging.basicConfig(level=logging.WARNING, format="%(message)s")

# 存放不存活 URL 的集合,用于跳过重复的 URL
dead_urls = set()

# 检查 URL 是否存活的函数
def check_url_status(url, retries=3):
    for _ in range(retries):
        try:
            response = requests.head(url, timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            if 200 <= response.status_code < 600:
                return response.status_code
        except Exception:
            pass
        time.sleep(1)  # 等待 1 秒后重试
    return None

# 执行 URL 扫描的函数
def vlun(urltarget, retries=3):
    # 如果 URL 在不存活 URL 列表中,直接返回
    if urltarget in dead_urls:
        return

    for _ in range(retries):
        try:
            if proxies:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            else:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False)
            
            if r.status_code == 200 and all(keyword not in r.headers.get('Content-Type', '') for keyword in ['html', 'image', 'xml', 'text', 'json', 'javascript']):
                tmp_rarsize = int(r.headers.get('Content-Length', 0))
                rarsize = str(size(tmp_rarsize))
                if int(rarsize[:-1]) > 0:
                    logging.warning('[ success ] {}  size:{}'.format(urltarget, rarsize))
                    with open(outputfile, 'a') as f:
                        try:
                            f.write(str(urltarget) + '  ' + 'size:' + str(rarsize) + '\n')
                        except Exception as e:
                            logging.warning(f"[ error ] Writing result failed: {e}")
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
            else:
                logging.warning('[ fail ] {}'.format(urltarget))
                return
        except Exception as e:
            logging.warning('[ fail ] {}'.format(urltarget))
        time.sleep(1)  # 等待 1 秒后重试

    # URL 不存活,记录根地址
    root_url = urllib.parse.urljoin(urltarget, '/')
    if root_url not in dead_urls:
        dead_urls.add(root_url)
        with open('waf_url.txt', 'a') as f:
            f.write(root_url + '\n')

# 处理 URL 检查的函数
def urlcheck(target=None, ulist=None):
    if target is not None and ulist is not None:
        if target.startswith('http://') or target.startswith('https://'):
            if target.endswith('/'):
                ulist.append(target)
            else:
                ulist.append(target + '/')
        else:
            line = 'http://' + target
            if line.endswith('/'):
                ulist.append(line)
            else:
                ulist.append(line + '/')
        return ulist

# 分发处理 URL 的函数
def dispatcher(url_file=None, url=None, max_thread=20, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist = urlcheck(line, urllist)
                else:
                    break
    elif url is not None and url_file is None:
        url = str(url.strip())
        urllist = urlcheck(url, urllist)
    else:
        pass

    with open(outputfile, 'a'):
        pass

    check_urllist = []
    for u in urllist:
        cport = None
        if u.startswith('http://'):
            ucp = u.lstrip('http://')
        elif u.startswith('https://'):
            ucp = u.lstrip('https://')
        if '/' in ucp:
            ucp = ucp.split('/')[0]
        if ':' in ucp:
            cport = ucp.split(':')[1]
            ucp = ucp.split(':')[0]
            www1 = ucp.split('.')
        else:
            www1 = ucp.split('.')
        wwwlen = len(www1)
        wwwhost = ''
        for i in range(1, wwwlen):
            wwwhost += www1[i]

        current_info_dic = deepcopy(dic)
        suffixFormat = ['.7z','dede','admin','sys','_sys','system','_system','manage','_manage','manager','_manager','_dede','_admin','.backup','.bak','.gz','.jar','.rar','.sql','.sql.gz','.tar','.tar.bz2','.tar.gz','.tar.tgz','.tgz','.txt','.war','.zip']
        domainDic = [ucp, ucp.replace('.', ''), ucp.replace('.', '_'), wwwhost, ucp.split('.', 1)[-1],
                     (ucp.split('.', 1)[1]).replace('.', '_'), www1[0], www1[1]]
        domainDic = list(set(domainDic))
        for s in suffixFormat:
            for d in domainDic:
                current_info_dic.extend([d + s])
        current_info_dic = list(set(current_info_dic))
        for info in current_info_dic:
            url = str(u) + str(info)
            check_urllist.append(url)

    urlist_len = len(urllist)
    check_urllist_len = len(check_urllist)
    per_distance = int(check_urllist_len / urlist_len)

    l = []
    p = ThreadPoolExecutor(max_thread)

    # 使用 tqdm 显示进度条
    with tqdm(total=check_urllist_len, desc="Scanning URLs") as pbar:
        futures = []
        for index1 in range(0, per_distance):
            for index2 in range(0, urlist_len):
                index = index2 * per_distance + index1
                if index < check_urllist_len:
                    url = check_urllist[index]
                    futures.append(p.submit(vlun, url))
        
        for future in as_completed(futures):
            future.result()  # 等待任务完成
            pbar.update(1)  # 更新进度条

    p.shutdown()

if __name__ == '__main__':
    usageexample = '\n       Example: python3 ihoneyBakFileScan_Modify.py -t 100 -f url.txt -o result.txt\n'
    usageexample += '                '
    usageexample += 'python3 ihoneyBakFileScan_Modify.py -u https://www.example.com/ -o result.txt'

    parser = ArgumentParser(add_help=True, usage=usageexample, description='A Website Backup File Leak Scan Tool.')
    parser.add_argument('-f', '--url-file', dest="url_file", help="Example: url.txt", default="url.txt")
    parser.add_argument('-t', '--thread', dest="max_threads", nargs='?', type=int, default=1, help="Max threads")
    parser.add_argument('-u', '--url', dest='url', nargs='?', type=str, help="Example: http://www.example.com/")
    parser.add_argument('-d', '--dict-file', dest='dict_file', nargs='?', help="Example: dict.txt")
    parser.add_argument('-o', '--output-file', dest="output_file", help="Example: result.txt", default="result.txt")
    parser.add_argument('-p', '--proxy', dest="proxy", help="Example: socks5://127.0.0.1:1080")

    args = parser.parse_args()
    tmp_suffixFormat = ['.7z','dede','admin','sys','_sys','system','_system','manage','_manage','manager','_manager','_dede','_admin','.backup','.bak','.gz','.jar','.rar','.sql','.sql.gz','.tar','.tar.bz2','.tar.gz','.tar.tgz','.tgz','.txt','.war','.zip']
    tmp_info_dic = ['%e5%95%86%e5%9f%8e','%e5%a4%87%e4%bb%bd','%e5%ae%89%e8%a3%85%e6%96%87%e4%bb%b6','%e6%95%b0%e6%8d%ae','%e6%95%b0%e6%8d%ae%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93','%e6%95%b0%e6%8d%ae%e5%ba%93%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93%e6%96%87%e4%bb%b6','%e6%95%b4%e7%ab%99','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(1)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(2)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(3)','%e6%96%b0%e5%bb%ba%e6%96%87%e6%9c%ac%e6%96%87%e6%a1%a3','%e6%9c%8d%e5%8a%a1%e5%99%a8','%e6%a8%a1%e6%9d%bf','%e6%ba%90%e7%a0%81','%e7%a8%8b%e5%ba%8f','%e7%ab%99%e7%82%b9','%e7%bd%91%e7%ab%99','%e7%bd%91%e7%ab%99%e5%a4%87%e4%bb%bd','%e8%af%b4%e6%98%8e','1','10','11','111','111111','123','123123','1234','12345','123456','127.0.0.1','1314','1980','1981','1982','1983','1984','1985','1986','1987','1988','1989','1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','2','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','2029','2030','3','4','5','520','6','7','7z','8','9','__zep__/js','a','aboard','access.log','add','addr','address','adm','admin','ajax','alditor','alditorimage','app','archive','asp','aspx','attach','auth','b','back','backup','backupdata','backups','bak','bb','bbs','beian','beifen','bf','bin','bin/bin','bin/bin1','bin/bin2','bin/dll','bin/dll1','bin/dll2','bin1','bin2','board','boss','browser','bz2','c','captcha','ceshi','cgi','cheditor','cheditor4','cheditor5','chximage','chxupload','ckeditor','clients','cms','code','com','common','config','connectors','contents','copy','copy05','cust','customers','d','dat','data','databack','databackup','databak','databakup','database','databasebak','datad','daumeditor','db','dbadmin','dbcon','dbmysql','dede','dedecms','default','dev','dingdan','div','dll','dntb','do','doc','download','dump','dz','e','edit','editor','email','emfo','engine','entries','error','error_log','example','ezweb','f','faisunzip','fck2','fckeditor','file','filemanager','filename','files','fileswf','fileupload','fileuploadsubmit','flash','flashfxp','form','forum','fourm','ftp','ftp1','g','gbk','gg','good','gz','h','hdocs','help','home','htdocs','htm','html','htmleditor','http','i','idcontent','idx','iis','iisweb','image','imageeditor','imagefrm','images','imageup','imageupload','img','imgupload','inc','include','index','insert','install','ir1','j','joomla','js','jsp','k','keycode','kind2','kneditor','l','lib','library','like','line','list','local','localhost','log','m','mail','manageadmin','master','material','mdb','media','members','message','min','mng','modify','multi','my','mysql','n','navereditor','new','newwebadmin','o','oa','ok','old','openwysiwyg','orders','p','paper','pass','password','photo','php','phpcms','phpmyadmin','pic','plugin','plus','pma','pop','popup','popups','popupsgecko','post','prcimageupload','productedit','q','quick','r','raineditor','rar','release','resource','root','s','s111','sales','sample','samples','scm','se2','seditordemo','seed','sell','server','shop','shu','shuju','shujuku','site','siteweb','sj','sjk','smart','smarteditor','smarteditor2','smarteditor2skin','spac','sql','sqldata','src0811','store','swf','swfupload','system','t','tar','tdi','tdmain','temp','template','test','tgz','tv','txt','typeimage0','u','ueditor','update','upfile','upfiles','upload','uploadaspx','uploader','uploadpic','uploadpopup','uploadtest','upphoto','upphoto2','user','userlist','users','v','v14','vb','vip','w','wangzhan','web','web1','webadmin','webbak','webconfig','webedit','webmysql','webroot','webservice','website','wm123','wordpress','wp','write','ww','wwroot','www','wwwroot','wx','wysiwyg','wz','x','xxx','y','ysc','z','z9v8flashfxp','z9v8ftp','zip','商城','备份','安装文件','数据','数据备份','数据库','数据库备份','数据库文件','整站','新建文件夹','新建文件夹(1)','新建文件夹(2)','新建文件夹(3)','新建文本文档','服务器','模板','源码','程序','站点','网站','网站备份','说明','1','10','11','111','111111','123','123123','1234','12345','123456','127.0.0.1','1314','1980','1981','1982','1983','1984','1985','1986','1987','1988','1989','1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','2','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','2029','2030','3','4','5','520','6','7','7z','8','9','__zep__/js','A','Aboard','Access.log','Add','Addr','Address','Adm','Admin','Ajax','Alditor','Alditorimage','App','Archive','Asp','Aspx','Attach','Auth','B','Back','Backup','Backupdata','Backups','Bak','Bb','Bbs','Beian','Beifen','Bf','Bin','Bin/bin','Bin/bin1','Bin/bin2','Bin/dll','Bin/dll1','Bin/dll2','Bin1','Bin2','Board','Boss','Browser','Bz2','C','Captcha','Ceshi','Cgi','Cheditor','Cheditor4','Cheditor5','Chximage','Chxupload','Ckeditor','Clients','Cms','Code','Com','Common','Config','Connectors','Contents','Copy','Copy05','Cust','Customers','D','Dat','Data','Databack','Databackup','Databak','Databakup','Database','Databasebak','Datad','Daumeditor','Db','Dbadmin','Dbcon','Dbmysql','Dede','Dedecms','Default','Dev','Dingdan','Div','Dll','Dntb','Do','Doc','Download','Dump','Dz','E','Edit','Editor','Email','Emfo','Engine','Entries','Error','Error_log','Example','Ezweb','F','Faisunzip','Fck2','Fckeditor','File','Filemanager','Filename','Files','Fileswf','Fileupload','Fileuploadsubmit','Flash','Flashfxp','Form','Forum','Fourm','Ftp','Ftp1','G','Gbk','Gg','Good','Gz','H','Hdocs','Help','Home','Htdocs','Htm','Html','Htmleditor','Http','I','Idcontent','Idx','Iis','Iisweb','Image','Imageeditor','Imagefrm','Images','Imageup','Imageupload','Img','Imgupload','Inc','Include','Index','Insert','Install','Ir1','J','Joomla','Js','Jsp','K','Keycode','Kind2','Kneditor','L','Lib','Library','Like','Line','List','Local','Localhost','Log','M','Mail','Manageadmin','Master','Material','Mdb','Media','Members','Message','Min','Mng','Modify','Multi','My','Mysql','N','Navereditor','New','Newwebadmin','O','Oa','Ok','Old','Openwysiwyg','Orders','P','Paper','Pass','Password','Photo','Php','Phpcms','Phpmyadmin','Pic','Plugin','Plus','Pma','Pop','Popup','Popups','Popupsgecko','Post','Prcimageupload','Productedit','Q','Quick','R','Raineditor','Rar','Release','Resource','Root','S','S111','Sales','Sample','Samples','Scm','Se2','Seditordemo','Seed','Sell','Server','Shop','Shu','Shuju','Shujuku','Site','Siteweb','Sj','Sjk','Smart','Smarteditor','Smarteditor2','Smarteditor2skin','Spac','Sql','Sqldata','Src0811','Store','Swf','Swfupload','System','T','Tar','Tdi','Tdmain','Temp','Template','Test','Tgz','Tv','Txt','Typeimage0','U','Ueditor','Update','Upfile','Upfiles','Upload','Uploadaspx','Uploader','Uploadpic','Uploadpopup','Uploadtest','Upphoto','Upphoto2','User','Userlist','Users','V','V14','Vb','Vip','W','Wangzhan','Web','Web1','Webadmin','Webbak','Webconfig','Webedit','Webmysql','Webroot','Webservice','Website','Wm123','Wordpress','Wp','Write','Ww','Wwroot','Www','Wwwroot','Wx','Wysiwyg','Wz','X','Xxx','Y','Ysc','Z','Z9v8flashfxp','Z9v8ftp','Zip','7Z','__ZEP__/JS','A','ABOARD','ACCESS.LOG','ADD','ADDR','ADDRESS','ADM','ADMIN','AJAX','ALDITOR','ALDITORIMAGE','APP','ARCHIVE','ASP','ASPX','ATTACH','AUTH','B','BACK','BACKUP','BACKUPDATA','BACKUPS','BAK','BB','BBS','BEIAN','BEIFEN','BF','BIN','BIN/BIN','BIN/BIN1','BIN/BIN2','BIN/DLL','BIN/DLL1','BIN/DLL2','BIN1','BIN2','BOARD','BOSS','BROWSER','BZ2','C','CAPTCHA','CESHI','CGI','CHEDITOR','CHEDITOR4','CHEDITOR5','CHXIMAGE','CHXUPLOAD','CKEDITOR','CLIENTS','CMS','CODE','COM','COMMON','CONFIG','CONNECTORS','CONTENTS','COPY','COPY05','CUST','CUSTOMERS','D','DAT','DATA','DATABACK','DATABACKUP','DATABAK','DATABAKUP','DATABASE','DATABASEBAK','DATAD','DAUMEDITOR','DB','DBADMIN','DBCON','DBMYSQL','DEDE','DEDECMS','DEFAULT','DEV','DINGDAN','DIV','DLL','DNTB','DO','DOC','DOWNLOAD','DUMP','DZ','E','EDIT','EDITOR','EMAIL','EMFO','ENGINE','ENTRIES','ERROR','ERROR_LOG','EXAMPLE','EZWEB','F','FAISUNZIP','FCK2','FCKEDITOR','FILE','FILEMANAGER','FILENAME','FILES','FILESWF','FILEUPLOAD','FILEUPLOADSUBMIT','FLASH','FLASHFXP','FORM','FORUM','FOURM','FTP','FTP1','G','GBK','GG','GOOD','GZ','H','HDOCS','HELP','HOME','HTDOCS','HTM','HTML','HTMLEDITOR','HTTP','I','IDCONTENT','IDX','IIS','IISWEB','IMAGE','IMAGEEDITOR','IMAGEFRM','IMAGES','IMAGEUP','IMAGEUPLOAD','IMG','IMGUPLOAD','INC','INCLUDE','INDEX','INSERT','INSTALL','IR1','J','JOOMLA','JS','JSP','K','KEYCODE','KIND2','KNEDITOR','L','LIB','LIBRARY','LIKE','LINE','LIST','LOCAL','LOCALHOST','LOG','M','MAIL','MANAGEADMIN','MASTER','MATERIAL','MDB','MEDIA','MEMBERS','MESSAGE','MIN','MNG','MODIFY','MULTI','MY','MYSQL','N','NAVEREDITOR','NEW','NEWWEBADMIN','O','OA','OK','OLD','OPENWYSIWYG','ORDERS','P','PAPER','PASS','PASSWORD','PHOTO','PHP','PHPCMS','PHPMYADMIN','PIC','PLUGIN','PLUS','PMA','POP','POPUP','POPUPS','POPUPSGECKO','POST','PRCIMAGEUPLOAD','PRODUCTEDIT','Q','QUICK','R','RAINEDITOR','RAR','RELEASE','RESOURCE','ROOT','S','S111','SALES','SAMPLE','SAMPLES','SCM','SE2','SEDITORDEMO','SEED','SELL','SERVER','SHOP','SHU','SHUJU','SHUJUKU','SITE','SITEWEB','SJ','SJK','SMART','SMARTEDITOR','SMARTEDITOR2','SMARTEDITOR2SKIN','SPAC','SQL','SQLDATA','SRC0811','STORE','SWF','SWFUPLOAD','SYSTEM','T','TAR','TDI','TDMAIN','TEMP','TEMPLATE','TEST','TGZ','TV','TXT','TYPEIMAGE0','U','UEDITOR','UPDATE','UPFILE','UPFILES','UPLOAD','UPLOADASPX','UPLOADER','UPLOADPIC','UPLOADPOPUP','UPLOADTEST','UPPHOTO','UPPHOTO2','USER','USERLIST','USERS','V','V14','VB','VIP','W','WANGZHAN','WEB','WEB1','WEBADMIN','WEBBAK','WEBCONFIG','WEBEDIT','WEBMYSQL','WEBROOT','WEBSERVICE','WEBSITE','WM123','WORDPRESS','WP','WRITE','WW','WWROOT','WWW','WWWROOT','WX','WYSIWYG','WZ','X','XXX','Y','YSC','Z','Z9V8FLASHFXP','Z9V8FTP','ZIP']
    info_dic = []
    for a in tmp_info_dic:
        for b in tmp_suffixFormat:
            info_dic.extend([a + b])

    global outputfile
    if args.output_file:
        outputfile = args.output_file
    else:
        outputfile = 'result.txt'
    global proxies
    if args.proxy:
        proxies = {
            'http': args.proxy,
            'https': args.proxy
        }
    else:
        proxies = ''
    header = Headers(
        headers=False
    )

    timeout = 10

    try:
        if args.dict_file:
            custom_dict = list(set([i.replace("\n", "") for i in open(str(args.dict_file), "r").readlines()]))
            info_dic.extend(custom_dict)
        if args.url:
            dispatcher(url=args.url, max_thread=args.max_threads, dic=info_dic)
        elif args.url_file:
            dispatcher(url_file=args.url_file, max_thread=args.max_threads, dic=info_dic)
        else:
            print("[!] Please specify a URL or URL file name")
    except Exception as e:
        print(e)

小备份字典:

python38 ihoneyBakFileScan_Modify_fx_smart.py -t 10 -f url.txt -o result_smart.txt

# -*- coding: UTF-8 -*-

import requests
import logging
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from hurry.filesize import size
from fake_headers import Headers
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import urllib.parse
from tqdm import tqdm  # 引入 tqdm 库

requests.packages.urllib3.disable_warnings()

logging.basicConfig(level=logging.WARNING, format="%(message)s")

# 存放不存活 URL 的集合,用于跳过重复的 URL
dead_urls = set()

# 检查 URL 是否存活的函数
def check_url_status(url, retries=3):
    for _ in range(retries):
        try:
            response = requests.head(url, timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            if 200 <= response.status_code < 600:
                return response.status_code
        except Exception:
            pass
        time.sleep(1)  # 等待 1 秒后重试
    return None

# 执行 URL 扫描的函数
def vlun(urltarget, retries=3):
    # 如果 URL 在不存活 URL 列表中,直接返回
    if urltarget in dead_urls:
        return

    for _ in range(retries):
        try:
            if proxies:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            else:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False)
            
            if r.status_code == 200 and all(keyword not in r.headers.get('Content-Type', '') for keyword in ['html', 'image', 'xml', 'text', 'json', 'javascript']):
                tmp_rarsize = int(r.headers.get('Content-Length', 0))
                rarsize = str(size(tmp_rarsize))
                if int(rarsize[:-1]) > 0:
                    logging.warning('[ success ] {}  size:{}'.format(urltarget, rarsize))
                    with open(outputfile, 'a') as f:
                        try:
                            f.write(str(urltarget) + '  ' + 'size:' + str(rarsize) + '\n')
                        except Exception as e:
                            logging.warning(f"[ error ] Writing result failed: {e}")
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
            else:
                logging.warning('[ fail ] {}'.format(urltarget))
                return
        except Exception as e:
            logging.warning('[ fail ] {}'.format(urltarget))
        time.sleep(1)  # 等待 1 秒后重试

    # URL 不存活,记录根地址
    root_url = urllib.parse.urljoin(urltarget, '/')
    if root_url not in dead_urls:
        dead_urls.add(root_url)
        with open('waf_url.txt', 'a') as f:
            f.write(root_url + '\n')

# 处理 URL 检查的函数
def urlcheck(target=None, ulist=None):
    if target is not None and ulist is not None:
        if target.startswith('http://') or target.startswith('https://'):
            if target.endswith('/'):
                ulist.append(target)
            else:
                ulist.append(target + '/')
        else:
            line = 'http://' + target
            if line.endswith('/'):
                ulist.append(line)
            else:
                ulist.append(line + '/')
        return ulist

# 分发处理 URL 的函数
def dispatcher(url_file=None, url=None, max_thread=20, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist = urlcheck(line, urllist)
                else:
                    break
    elif url is not None and url_file is None:
        url = str(url.strip())
        urllist = urlcheck(url, urllist)
    else:
        pass

    with open(outputfile, 'a'):
        pass

    check_urllist = []
    for u in urllist:
        cport = None
        if u.startswith('http://'):
            ucp = u.lstrip('http://')
        elif u.startswith('https://'):
            ucp = u.lstrip('https://')
        if '/' in ucp:
            ucp = ucp.split('/')[0]
        if ':' in ucp:
            cport = ucp.split(':')[1]
            ucp = ucp.split(':')[0]
            www1 = ucp.split('.')
        else:
            www1 = ucp.split('.')
        wwwlen = len(www1)
        wwwhost = ''
        for i in range(1, wwwlen):
            wwwhost += www1[i]

        current_info_dic = deepcopy(dic)
        suffixFormat = ['.zip','.rar','.txt','.tar.gz','.tgz','.gz']
        domainDic = [ucp, ucp.replace('.', ''), ucp.replace('.', '_'), wwwhost, ucp.split('.', 1)[-1],
                     (ucp.split('.', 1)[1]).replace('.', '_'), www1[0], www1[1]]
        domainDic = list(set(domainDic))
        for s in suffixFormat:
            for d in domainDic:
                current_info_dic.extend([d + s])
        current_info_dic = list(set(current_info_dic))
        for info in current_info_dic:
            url = str(u) + str(info)
            check_urllist.append(url)

    urlist_len = len(urllist)
    check_urllist_len = len(check_urllist)
    per_distance = int(check_urllist_len / urlist_len)

    l = []
    p = ThreadPoolExecutor(max_thread)

    # 使用 tqdm 显示进度条
    with tqdm(total=check_urllist_len, desc="Scanning URLs") as pbar:
        futures = []
        for index1 in range(0, per_distance):
            for index2 in range(0, urlist_len):
                index = index2 * per_distance + index1
                if index < check_urllist_len:
                    url = check_urllist[index]
                    futures.append(p.submit(vlun, url))
        
        for future in as_completed(futures):
            future.result()  # 等待任务完成
            pbar.update(1)  # 更新进度条

    p.shutdown()

if __name__ == '__main__':
    usageexample = '\n       Example: python3 ihoneyBakFileScan_Modify.py -t 100 -f url.txt -o result.txt\n'
    usageexample += '                '
    usageexample += 'python3 ihoneyBakFileScan_Modify.py -u https://www.example.com/ -o result.txt'

    parser = ArgumentParser(add_help=True, usage=usageexample, description='A Website Backup File Leak Scan Tool.')
    parser.add_argument('-f', '--url-file', dest="url_file", help="Example: url.txt", default="url.txt")
    parser.add_argument('-t', '--thread', dest="max_threads", nargs='?', type=int, default=1, help="Max threads")
    parser.add_argument('-u', '--url', dest='url', nargs='?', type=str, help="Example: http://www.example.com/")
    parser.add_argument('-d', '--dict-file', dest='dict_file', nargs='?', help="Example: dict.txt")
    parser.add_argument('-o', '--output-file', dest="output_file", help="Example: result.txt", default="result.txt")
    parser.add_argument('-p', '--proxy', dest="proxy", help="Example: socks5://127.0.0.1:1080")

    args = parser.parse_args()
    tmp_suffixFormat = ['.zip','.rar','.txt','.tar.gz','.tgz','.gz']
    tmp_info_dic = ['%e5%95%86%e5%9f%8e','%e5%a4%87%e4%bb%bd','%e5%ae%89%e8%a3%85%e6%96%87%e4%bb%b6','%e6%95%b0%e6%8d%ae','%e6%95%b0%e6%8d%ae%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93','%e6%95%b0%e6%8d%ae%e5%ba%93%e5%a4%87%e4%bb%bd','%e6%95%b0%e6%8d%ae%e5%ba%93%e6%96%87%e4%bb%b6','%e6%95%b4%e7%ab%99','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(1)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(2)','%e6%96%b0%e5%bb%ba%e6%96%87%e4%bb%b6%e5%a4%b9(3)','%e6%96%b0%e5%bb%ba%e6%96%87%e6%9c%ac%e6%96%87%e6%a1%a3','%e6%9c%8d%e5%8a%a1%e5%99%a8','%e6%a8%a1%e6%9d%bf','%e6%ba%90%e7%a0%81','%e7%a8%8b%e5%ba%8f','%e7%ab%99%e7%82%b9','%e7%bd%91%e7%ab%99','%e7%bd%91%e7%ab%99%e5%a4%87%e4%bb%bd','%e8%af%b4%e6%98%8e','__zep__/js','1','10','11','111','111111','123','123123','1234','12345','123456','127.0.0.1','1314','1980','1981','1982','1983','1984','1985','1986','1987','1988','1989','1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','2','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','2029','2030','3','4','5','520','6','7','7z','8','9','a','aboard','access.log','add','addr','address','adm','admin','ajax','alditor','alditorimage','app','archive','asp','aspx','attach','auth','b','back','backup','backupdata','backups','bak','bb','bbs','beian','beifen','bf','bin','bin/bin','bin/bin1','bin/bin2','bin/dll','bin/dll1','bin/dll2','bin1','bin2','board','boss','browser','bz2','c','captcha','ceshi','cgi','cheditor','cheditor4','cheditor5','chximage','chxupload','ckeditor','clients','cms','code','com','common','config','connectors','contents','copy','copy05','cust','customers','d','dat','data','databack','databackup','databak','databakup','database','databasebak','datad','daumeditor','db','dbadmin','dbcon','dbmysql','dede','dedecms','default','dev','dingdan','div','dll','dntb','do','doc','download','dump','dz','e','edit','editor','email','emfo','engine','entries','error','error_log','example','ezweb','f','faisunzip','fck2','fckeditor','file','filemanager','filename','files','fileswf','fileupload','fileuploadsubmit','flash','flashfxp','form','forum','fourm','ftp','ftp1','g','gbk','gg','good','gz','h','hdocs','help','home','htdocs','htm','html','htmleditor','http','i','idcontent','idx','iis','iisweb','image','imageeditor','imagefrm','images','imageup','imageupload','img','imgupload','inc','include','index','insert','install','ir1','j','joomla','js','jsp','k','keycode','kind2','kneditor','l','lib','library','like','line','list','local','localhost','log','m','mail','manageadmin','master','material','mdb','media','members','message','min','mng','modify','multi','my','mysql','n','navereditor','new','newwebadmin','o','oa','ok','old','openwysiwyg','orders','p','paper','pass','password','photo','php','phpcms','phpmyadmin','pic','plugin','plus','pma','pop','popup','popups','popupsgecko','post','prcimageupload','productedit','q','quick','r','raineditor','rar','release','resource','root','s','s111','sales','sample','samples','scm','se2','seditordemo','seed','sell','server','shop','shu','shuju','shujuku','site','siteweb','sj','sjk','smart','smarteditor','smarteditor2','smarteditor2skin','spac','sql','sqldata','src0811','store','swf','swfupload','system','t','tar','tdi','tdmain','temp','template','test','tgz','tv','txt','typeimage0','u','ueditor','update','upfile','upfiles','upload','uploadaspx','uploader','uploadpic','uploadpopup','uploadtest','upphoto','upphoto2','user','userlist','users','v','v14','vb','vip','w','wangzhan','web','web1','webadmin','webbak','webconfig','webedit','webmysql','webroot','webservice','website','wm123','wordpress','wp','write','ww','wwroot','www','wwwroot','wx','wysiwyg','wz','x','xxx','y','ysc','z','z9v8flashfxp','z9v8ftp','zip','安装文件','备份','程序','服务器','模板','商城','数据','数据备份','数据库','数据库备份','数据库文件','说明','网站','网站备份','新建文本文档','新建文件夹','新建文件夹(1)','新建文件夹(2)','新建文件夹(3)','源码','站点','整站']
    info_dic = []
    for a in tmp_info_dic:
        for b in tmp_suffixFormat:
            info_dic.extend([a + b])

    global outputfile
    if args.output_file:
        outputfile = args.output_file
    else:
        outputfile = 'result.txt'
    global proxies
    if args.proxy:
        proxies = {
            'http': args.proxy,
            'https': args.proxy
        }
    else:
        proxies = ''
    header = Headers(
        headers=False
    )

    timeout = 10

    try:
        if args.dict_file:
            custom_dict = list(set([i.replace("\n", "") for i in open(str(args.dict_file), "r").readlines()]))
            info_dic.extend(custom_dict)
        if args.url:
            dispatcher(url=args.url, max_thread=args.max_threads, dic=info_dic)
        elif args.url_file:
            dispatcher(url_file=args.url_file, max_thread=args.max_threads, dic=info_dic)
        else:
            print("[!] Please specify a URL or URL file name")
    except Exception as e:
        print(e)

自己的字典:dicc.txt 就用自己的吧,

python38 ihoneyBakFileScan_Modify_fx_dir.py -t 10 -f url.txt -o results_dir.txt

# -*- coding: UTF-8 -*-

import requests
import logging
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from hurry.filesize import size
from fake_headers import Headers
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import urllib.parse
from tqdm import tqdm  # 引入 tqdm 库

requests.packages.urllib3.disable_warnings()

logging.basicConfig(level=logging.WARNING, format="%(message)s")

# 存放不存活 URL 的集合,用于跳过重复的 URL
dead_urls = set()

# 自定义404页面路径
custom_404_path = '/8krrotrejtwejt3563657sewWWER'

# 常见404页面的关键词和特征
common_404_keywords = ['404', 'not found', 'page not found', 'error 404', 'page does not exist']

# 自定义404页面判断函数
def is_custom_404(url, retries=3):
    for _ in range(retries):
        try:
            response = requests.get(url + custom_404_path, timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            # 200 或 301 状态码视为自定义404
            if response.status_code in [200, 301]:
                return True
        except Exception:
            pass
        time.sleep(1)  # 等待 1 秒后重试
    return False

# 判断是否为常见404页面
def is_common_404(response):
    # 检查响应状态码
    if response.status_code == 404:
        # 解析响应内容
        content = response.text.lower()
        # 检查内容中是否包含常见404的关键词
        for keyword in common_404_keywords:
            if keyword in content:
                return True
    return False

# 执行 URL 扫描的函数
def vlun(urltarget, retries=3):
    # 如果 URL 在不存活 URL 列表中,直接返回
    if urltarget in dead_urls:
        return

    for _ in range(retries):
        try:
            if proxies:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False, proxies=proxies)
            else:
                r = requests.get(url=urltarget, headers=header.generate(), timeout=timeout, allow_redirects=False, stream=True, verify=False)
            
            # 检查是否为自定义404页面
            if is_custom_404(urltarget):
                logging.warning('[ fail ] {} (Custom 404 page detected)'.format(urltarget))
                return

            # 其他成功或失败的判断
            if r.status_code == 200 and all(keyword not in r.headers.get('Content-Type', '') for keyword in ['html', 'image', 'xml', 'text', 'json', 'javascript']):
                tmp_rarsize = int(r.headers.get('Content-Length', 0))
                rarsize = str(size(tmp_rarsize))
                if int(rarsize[:-1]) > 0:
                    logging.warning('[ success ] {}  size:{}'.format(urltarget, rarsize))
                    with open(outputfile, 'a') as f:
                        try:
                            f.write(str(urltarget) + '  ' + 'size:' + str(rarsize) + '\n')
                        except Exception as e:
                            logging.warning(f"[ error ] Writing result failed: {e}")
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
            else:
                # 检查是否为常见的404页面
                if is_common_404(r):
                    logging.warning('[ fail ] {} (Common 404 page detected)'.format(urltarget))
                else:
                    logging.warning('[ fail ] {}'.format(urltarget))
                return
        except Exception as e:
            logging.warning('[ fail ] {}'.format(urltarget))
        time.sleep(1)  # 等待 1 秒后重试

    # URL 不存活,记录根地址
    root_url = urllib.parse.urljoin(urltarget, '/')
    if root_url not in dead_urls:
        dead_urls.add(root_url)
        with open('waf_url.txt', 'a') as f:
            f.write(root_url + '\n')

# 处理 URL 检查的函数
def urlcheck(target=None, ulist=None):
    if target is not None and ulist is not None:
        if target.startswith('http://') or target.startswith('https://'):
            if target.endswith('/'):
                ulist.append(target)
            else:
                ulist.append(target + '/')
        else:
            line = 'http://' + target
            if line.endswith('/'):
                ulist.append(line)
            else:
                ulist.append(line + '/')
        return ulist

# 从字典文件中读取字典项
def load_dict_file(dict_file='dicc.txt'):
    dic = []
    with open(dict_file, 'r') as f:
        for line in f:
            stripped_line = line.strip()
            if stripped_line:
                dic.append(stripped_line.lstrip('/'))
    return dic

# 生成所有扫描 URL
def generate_combinations(url, dic):
    combinations = []
    for entry in dic:
        full_url = urllib.parse.urljoin(url, entry)
        combinations.append(full_url)
    return combinations

def dispatcher(url_file=None, url=None, max_thread=20, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist = urlcheck(line, urllist)
                else:
                    break
    elif url is not None and url_file is None:
        url = str(url.strip())
        urllist = urlcheck(url, urllist)
    else:
        pass

    with open(outputfile, 'a'):
        pass

    check_urllist = []
    for u in urllist:
        # 先检查根 URL 是否为自定义 404 页面
        if is_custom_404(u):
            logging.warning(f'[ fail ] {u} (Custom 404 page detected)')
            continue

        # 生成所有需要扫描的 URL
        check_urllist.extend(generate_combinations(u, dic))

    urlist_len = len(urllist)
    check_urllist_len = len(check_urllist)
    per_distance = int(check_urllist_len / urlist_len)

    l = []
    p = ThreadPoolExecutor(max_thread)

    # 使用 tqdm 显示进度条
    with tqdm(total=check_urllist_len, desc="Scanning URLs") as pbar:
    # 任务执行的代码
        futures = []
        for index1 in range(0, per_distance):
            for index2 in range(0, urlist_len):
                index = index2 * per_distance + index1
                if index < check_urllist_len:
                    url = check_urllist[index]
                    futures.append(p.submit(vlun, url))
        
        for future in as_completed(futures):
            future.result()  # 等待任务完成
            pbar.update(1)  # 更新进度条
    
    p.shutdown()


if __name__ == '__main__':
    usageexample = '\n       Example: python3 ihoneyBakFileScan_Modify.py -t 100 -f url.txt -o result.txt\n'
    usageexample += '                '
    usageexample += 'python3 ihoneyBakFileScan_Modify.py -u https://www.example.com/ -o result.txt'

    parser = ArgumentParser(add_help=True, usage=usageexample, description='A Website Backup File Leak Scan Tool.')
    parser.add_argument('-f', '--url-file', dest="url_file", help="Example: url.txt", default="url.txt")
    parser.add_argument('-t', '--thread', dest="max_threads", nargs='?', type=int, default=1, help="Max threads")
    parser.add_argument('-u', '--url', dest='url', nargs='?', type=str, help="Example: http://www.example.com/")
    parser.add_argument('-o', '--output-file', dest="output_file", help="Example: result.txt", default="result.txt")
    parser.add_argument('-p', '--proxy', dest="proxy", help="Example: socks5://127.0.0.1:1080")

    args = parser.parse_args()

    # 从默认字典文件中加载自定义字典
    info_dic = load_dict_file('dicc.txt')

    global outputfile
    if (args.output_file):
        outputfile = args.output_file
    else:
        outputfile = 'result.txt'
    # 添加代理
    global proxies
    if (args.proxy):
        proxies = {
            'http': args.proxy,
            'https': args.proxy
        }
    else:
        proxies = ''
    header = Headers(
        # generate any browser
    )

    timeout = 5  # Define the timeout value

    dispatcher(url_file=args.url_file, url=args.url, max_thread=args.max_threads, dic=info_dic)

python38 -m pip install -r pip.txt

fake_headers==1.0.2
hurry==1.1
hurry.filesize==0.9
requests==2.31.0
相关推荐
m0_6759882329 分钟前
Leetcode2545:根据第 K 场考试的分数排序
python·算法·leetcode
w(゚Д゚)w吓洗宝宝了33 分钟前
C vs C++: 一场编程语言的演变与对比
c语言·开发语言·c++
AI人H哥会Java1 小时前
【Spring】Spring的模块架构与生态圈—Spring MVC与Spring WebFlux
java·开发语言·后端·spring·架构
开心工作室_kaic1 小时前
springboot461学生成绩分析和弱项辅助系统设计(论文+源码)_kaic
开发语言·数据库·vue.js·php·apache
觉醒的程序猿2 小时前
vue2设置拖拽选中时间区域
开发语言·前端·javascript
明月看潮生2 小时前
青少年编程与数学 02-004 Go语言Web编程 12课题、本地数据存储
开发语言·青少年编程·本地存储·编程与数学·goweb
唐墨1232 小时前
golang自定义MarshalJSON、UnmarshalJSON 原理和技巧
开发语言·后端·golang
凡人的AI工具箱2 小时前
每天40分玩转Django:Django测试
数据库·人工智能·后端·python·django·sqlite
小老鼠不吃猫2 小时前
C++点云大文件读取
开发语言·c++
qyq12 小时前
Django框架与ORM框架
后端·python·django