selenium 防检测策略的方法汇总:
合理设置延迟:请求间添加随机延迟 (2-10秒)
限制爬取频率:控制每小时/每天的请求量
轮换用户代理:准备至少10个不同的User-Agent
使用住宅代理:优先选择高质量的住宅代理IP
处理验证码:集成2Captcha或Anti-Captcha服务
定期更新工具:保持selenium和浏览器驱动最新版本
1. 基础防检测配置
python
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def get_stealth_driver():
options = Options()
# 基本防检测设置
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
# 禁用自动化控制标志
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
# 随机用户代理
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64)...",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)..."
]
import random
options.add_argument(f"user-agent={random.choice(user_agents)}")
driver = webdriver.Chrome(options=options)
# 修改navigator.webdriver属性
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
return driver
2. 高级防检测技术
2.1 使用 undetected-chromedriver
python
import undetected_chromedriver as uc
def get_undetected_driver():
options = uc.ChromeOptions()
# 配置选项
options.add_argument("--disable-popup-blocking")
options.add_argument("--disable-notifications")
# 随机窗口大小
import random
width = random.randint(1000, 1400)
height = random.randint(700, 900)
options.add_argument(f"--window-size={width},{height}")
driver = uc.Chrome(
options=options,
version_main=114, # 匹配你的Chrome版本
headless=False,
use_subprocess=True
)
return driver
2.2 模拟人类行为模式
python
from selenium.webdriver.common.action_chains import ActionChains
import time
import random
def human_like_behavior(driver, element=None):
"""模拟人类操作行为"""
actions = ActionChains(driver)
# 随机鼠标移动
if element:
actions.move_to_element(element)
else:
x = random.randint(0, 500)
y = random.randint(0, 500)
actions.move_by_offset(x, y)
# 随机延迟
time.sleep(random.uniform(0.5, 2.5))
# 随机滚动
scroll_amount = random.randint(200, 800)
driver.execute_script(f"window.scrollBy(0, {scroll_amount})")
time.sleep(random.uniform(0.3, 1.8))
actions.perform()
3. 完整防检测爬取流程
python
def stealth_scrape(url):
try:
# 使用undetected-chromedriver
driver = get_undetected_driver()
# 访问目标URL
driver.get(url)
# 随机等待
time.sleep(random.uniform(2, 5))
# 模拟人类浏览行为
human_like_behavior(driver)
# 执行实际爬取操作
# 示例:获取页面标题
title = driver.title
print(f"成功获取页面标题: {title}")
# 更多爬取逻辑...
except Exception as e:
print(f"爬取过程中发生错误: {str(e)}")
finally:
driver.quit()
# 使用示例
stealth_scrape("https://example.com")
4. 额外防护措施
4.1 代理IP轮换
python
proxies = [
"123.45.67.89:8080",
"98.76.54.32:3128"
] #换成自己的
def get_proxy_driver():
options = uc.ChromeOptions()
proxy = random.choice(proxies)
options.add_argument(f"--proxy-server=http://{proxy}")
return uc.Chrome(options=options)
4.2 指纹混淆
python
def modify_fingerprint(driver):
# 修改屏幕分辨率
driver.execute_script(
"Object.defineProperty(screen, 'width', {get: () => 1920});"
"Object.defineProperty(screen, 'height', {get: () => 1080});"
)
# 修改时区
driver.execute_cdp_cmd(
"Emulation.setTimezoneOverride",
{"timezoneId": "America/New_York"}
)
# 修改WebGL指纹
driver.execute_script(
"const getParameter = WebGLRenderingContext.prototype.getParameter;"
"WebGLRenderingContext.prototype.getParameter = function(parameter) {"
" if (parameter === 37445) { return 'NVIDIA Corporation'; }"
" return getParameter.call(this, parameter);"
"};"
)
5. 检测与验证
python
def test_stealth(driver):
test_urls = [
"https://bot.sannysoft.com",
"https://arh.antoinevastel.com/bots/areyouheadless"
]
for url in test_urls:
driver.get(url)
time.sleep(3)
driver.save_screenshot(f"stealth_test_{url.split('/')[-1]}.png")
print(f"测试结果已保存: stealth_test_{url.split('/')[-1]}.png")