Playwright 反检测自动化脚本详解

代码功能概述

本文展示了一个使用 Playwright 进行网页自动化操作的 Python 脚本,特别针对反检测机制进行了优化。该脚本模拟人类操作行为访问百度搜索页面,自动输入查询词并提交搜索,同时通过多种技术手段规避网站的反爬虫检测。

环境准备

bash 复制代码
pip install playwright
python -m playwright install chromium

完整代码(带详细注释)

python 复制代码
from playwright.sync_api import sync_playwright
import time
import random

def human_delay(a=0.5, b=1.5):
    time.sleep(random.uniform(a, b))

with sync_playwright() as p:
    # 禁用自动化标志 --disable-blink-features=AutomationControlled
    browser = p.chromium.launch(
        headless=False,
        args=["--disable-blink-features=AutomationControlled"]
    )
    page = browser.new_page()
    
    # 注入脚本隐藏 webdriver 属性
    page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    page.goto("https://baidu.com")
    page.fill("#chat-textarea", "ChatGPT")
    human_delay()
    
    page.click("#chat-submit-button")
    

    text = page.inner_text("body")
    print(text)
    input("按回车关闭浏览器...")
    browser.close()

保存cookie版本

python 复制代码
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import time
import random

def human_delay(a=0.5, b=1.5):
    time.sleep(random.uniform(a, b))

with sync_playwright() as p: 
    context = p.chromium.launch_persistent_context(
        user_data_dir="my_profile",   # ⭐在这里才存在
        headless=False,
        args=["--disable-blink-features=AutomationControlled"]
    )
    
    page = context.new_page()
    
    # 注入脚本隐藏 webdriver 属性
    page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    page.goto("https://baidu.com")
    page.fill("#chat-textarea", "ChatGPT")
    human_delay()
    page.wait_for_timeout(2000)  # 或等某个结果 selector
    
    page.click("#chat-submit-button")
    page.wait_for_timeout(2000)  # 或等某个结果 selector
    

    text = page.inner_text("body")
    html = page.content()
    # print(html)
    soup = BeautifulSoup(html, "html.parser")

    print(soup.title.text)
    print(soup.get_text())
    
    with open("baidu.html", "w", encoding="utf-8") as f:
        f.write(html)
    

    input("按回车关闭浏览器...")
    try:
        context.close()
    except Exception as e:
        pass