浏览器高级配置
设置浏览器无头模式(Headless Chrome/Firefox):
python
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless=new')
driver = webdriver.Chrome(options=options)
自定义用户代理和窗口大小:
python
options.add_argument('--user-agent=CustomAgent')
options.add_argument('--window-size=1920,1080')
元素高级定位
XPath高级定位示例(包含动态属性处理):
python
# 包含特定文本
element = driver.find_element(By.XPATH, "//*[contains(text(),'动态文本')]")
# 多条件组合定位
element = driver.find_element(By.XPATH, "//div[@class='list' and @data-id>100]")
# 轴定位(查找兄弟节点)
sibling = driver.find_element(By.XPATH, "//div[@id='target']/following-sibling::div[1]")
CSS选择器高级用法:
python
# 属性值部分匹配
element = driver.find_element(By.CSS_SELECTOR, "[class*='partial']")
# 子元素选择
element = driver.find_element(By.CSS_SELECTOR, "ul.items > li:first-child")
复杂交互操作
处理动态加载内容(显式等待结合EC条件):
python
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "dynamicElement"))
)
鼠标动作链(复杂拖拽操作):
python
from selenium.webdriver.common.action_chains import ActionChains
source = driver.find_element(By.ID, "source")
target = driver.find_element(By.ID, "target")
ActionChains(driver).drag_and_drop(source, target).perform()
文件处理
文件上传(无需GUI交互):
python
file_input = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
file_input.send_keys("/absolute/path/to/file.txt")
文件下载配置(Chrome示例):
python
options = webdriver.ChromeOptions()
prefs = {
"download.default_directory": "/path/to/downloads",
"download.prompt_for_download": False
}
options.add_experimental_option("prefs", prefs)
性能优化
禁用图片加载提升速度:
python
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
网络请求拦截(Chrome DevTools协议):
python
driver.execute_cdp_cmd('Network.enable', {})
driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': ['*.png', '*.gif']})
多窗口/框架处理
窗口切换控制:
python
main_window = driver.current_window_handle
driver.switch_to.window(new_window_handle)
# 操作结束后切回主窗口
driver.switch_to.window(main_window)
嵌套iframe处理:
python
driver.switch_to.frame("frameName")
driver.switch_to.frame(0) # 通过索引切换
driver.switch_to.default_content() # 返回主文档
高级验证机制
页面完全加载检测:
python
WebDriverWait(driver, 30).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
AJAX请求完成检测:
python
WebDriverWait(driver, 10).until(
lambda d: d.execute_script("return jQuery.active == 0")
)
异常处理
智能重试机制:
python
from selenium.common.exceptions import StaleElementReferenceException
retries = 3
for attempt in range(retries):
try:
element.click()
break
except StaleElementReferenceException:
if attempt == retries - 1: raise
分布式测试
Selenium Grid配置:
python
from selenium.webdriver.remote.webdriver import WebDriver
hub_url = "http://hub_host:4444/wd/hub"
capabilities = {
"browserName": "chrome",
"platform": "LINUX"
}
driver = WebDriver(command_executor=hub_url, desired_capabilities=capabilities)