Python安全开发之子域名扫描器（含详细注释）

核心代码

python 复制代码

import socket
import argparse
import threading
from concurrent.futures import ThreadPoolExecutor
import time

# 全局锁对象，用于线程安全地访问共享资源found_subdomains
found_subdomain_lock = threading.Lock()
# 存储找到的子域名结果的全局列表
found_subdomains = []

def check_subdomain(domain, subdomain):
    """
    检查指定的子域名是否存在（通过DNS解析）
    
    参数:
        domain (str): 主域名，如 "baidu.com"
        subdomain (str): 要检查的子域名前缀，如 "www" 或 "mail"
    
    功能:
        1. 构造完整的子域名（如 www.baidu.com）
        2. 尝试进行DNS解析获取IP地址
        3. 如果解析成功，记录结果并打印
        4. 处理各种可能的异常情况
    """
    # 构造完整的子域名
    full_domain = f"{subdomain}.{domain}"
    try:
        # 尝试通过socket.gethostbyname进行DNS解析
        ip_address = socket.gethostbyname(full_domain)
        # 格式化输出结果，保持对齐美观
        result = f"[+] Found: {full_domain:<30} -> {ip_address}"
        print(result)
        # 使用锁确保多线程环境下对共享列表的安全访问
        with found_subdomain_lock:
            found_subdomains.append(result)
    except socket.gaierror:
        # socket.gaierror表示DNS解析失败（域名不存在）
        # 这是最常见的情况，因为大多数子域名都不存在
        pass
    except Exception as e:
        # 捕获其他可能的异常并打印错误信息
        print(f"[!] 扫描子域名 {full_domain} 时发生错误：{e}")

def main():
    """
    主函数：解析命令行参数，读取字典文件，启动多线程扫描
    
    功能流程:
        1. 解析用户提供的命令行参数
        2. 读取子域名字典文件
        3. 使用线程池并发执行子域名检查
        4. 输出扫描结果并可选择性保存到文件
    """
    # 创建命令行参数解析器
    parser = argparse.ArgumentParser(description="子域名扫描")
    # 添加必需的域名参数
    parser.add_argument("-d", "--domain", required=True, help="目标域名")
    # 添加必需的字典文件参数
    parser.add_argument("-w", "--wordlist", required=True, help="字典文件")
    # 添加线程数参数，默认为10
    parser.add_argument("-t", "--threads", type=int, default=10, help="线程数")
    # 添加可选的输出文件参数
    parser.add_argument("-o", "--output", help="输出文件")
    # 解析命令行参数
    args = parser.parse_args()

    # 提取参数值
    domain = args.domain
    wordlist_path = args.wordlist
    num_threads = args.threads

    # 打印扫描配置信息
    print("-"*50)
    print(f"[+] 域名：{domain}")
    print(f"[+] 字典文件：{wordlist_path}")
    print(f"[+] 线程数：{num_threads}")
    print(f"[+]时间：{time.strftime('%Y-%m-%d %H:%M:%S')}")
    print("-"*50)

    # 尝试打开并读取字典文件
    try:
        with open(wordlist_path, "r") as wordlist_file:
            # 使用列表推导式读取所有非空行并去除首尾空白字符
            # 列表推导式说明：
            # [line.strip() for line in wordlist_file if line.strip()]
            # 相当于以下传统循环代码：
            # subdomains = []
            # for line in wordlist_file:
            #     stripped_line = line.strip()
            #     if stripped_line:  # 如果去除空白后不为空
            #         subdomains.append(stripped_line)
            # 
            # 列表推导式的优势：
            # 1. 语法更简洁
            # 2. 执行效率更高
            # 3. 可读性更好（对于熟悉Python的开发者）
            subdomains = [line.strip() for line in wordlist_file if line.strip()]
            print(f"[+] 待扫描的子域名数：{len(subdomains)}")
    except FileNotFoundError:
        print(f"[!] 字典文件 {wordlist_path} 不存在")
        return

    # 使用ThreadPoolExecutor创建线程池，限制最大工作线程数
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        # 使用executor.map并发执行check_subdomain函数
        # lambda函数用于适配参数，固定domain参数，变化subdomains中的每个子域名
        executor.map(lambda sub: check_subdomain(domain, sub), subdomains)

    # 扫描完成后的结果处理
    print("-"*50)
    print("[+] 扫描完成")
    if found_subdomains:
        print(f"[+] 扫描完成，找到 {len(found_subdomains)} 个子域名")

        # 对结果进行排序以便更好地查看
        full_result = sorted(found_subdomains)
        if args.output:
            print(f"[+] 保存结果到 {args.output}")
            with open(args.output, "w") as f:
                for result in full_result:
                    f.write(result + "\n")
            print("[-] 结果保存完毕")
    else:
        print("[-] 未找到任何子域名")
        print("-"*50)

if __name__ == "__main__":
    main()

1、列表推导式 $line.strip() for line in wordlist_file if line.strip()$ 的详细解释：

这是一个高效的Python惯用法，用于从文件中读取所有非空行
line.strip() 去除每行首尾的空白字符（包括换行符）
if line.strip() 过滤掉空行（去除空白后为空的行）
整体相当于一个过滤和转换的组合操作

2、线程安全：使用 threading.Lock 确保多个线程不会同时修改 found_subdomains 列表

3、异常处理：区分了 socket.gaierror（常见的DNS解析失败）和其他异常

4、并发处理：使用 ThreadPoolExecutor 实现多线程并发扫描，提高扫描效率