Python 在 Web 领域的自动化应用
Python 在 Web 自动化方面有着广泛的应用,以下是主要的应用场景和工具:
1. 网页测试自动化
Selenium - 功能测试
python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def test_login():
driver = webdriver.Chrome()
try:
driver.get("https://example.com/login")
# 输入用户名密码
username = driver.find_element(By.ID, "username")
password = driver.find_element(By.ID, "password")
username.send_keys("testuser")
password.send_keys("password123")
# 点击登录
login_btn = driver.find_element(By.XPATH, "//button[@type='submit']")
login_btn.click()
# 等待页面跳转并验证
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "dashboard"))
)
print("登录测试成功!")
finally:
driver.quit()
# 运行测试
test_login()
Playwright - 现代浏览器自动化
python
from playwright.sync_api import sync_playwright
def test_with_playwright():
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
# 导航到页面
page.goto("https://example.com")
# 截图
page.screenshot(path="screenshot.png")
# 获取页面内容
title = page.title()
print(f"页面标题: {title}")
browser.close()
2. 网页数据抓取
BeautifulSoup + Requests
python
import requests
from bs4 import BeautifulSoup
import pandas as pd
def scrape_website(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# 提取数据
data = []
articles = soup.find_all('article', class_='news-item')
for article in articles:
title = article.find('h2').text.strip()
link = article.find('a')['href']
date = article.find('time')['datetime']
data.append({
'title': title,
'link': link,
'date': date
})
# 保存到CSV
df = pd.DataFrame(data)
df.to_csv('scraped_data.csv', index=False)
return df
# 使用示例
scrape_website('https://news.example.com')
Scrapy - 专业爬虫框架
python
# 安装: pip install scrapy
# 创建项目: scrapy startproject myproject
# 示例爬虫
import scrapy
class NewsSpider(scrapy.Spider):
name = 'news'
start_urls = ['https://news.example.com']
def parse(self, response):
for article in response.css('article.news-item'):
yield {
'title': article.css('h2::text').get(),
'link': article.css('a::attr(href)').get(),
'date': article.css('time::attr(datetime)').get()
}
# 翻页
next_page = response.css('a.next-page::attr(href)').get()
if next_page:
yield response.follow(next_page, self.parse)
3. API 测试自动化
Requests + Pytest
python
import requests
import pytest
class TestAPI:
BASE_URL = "https://api.example.com"
def test_get_users(self):
response = requests.get(f"{self.BASE_URL}/users")
assert response.status_code == 200
assert isinstance(response.json(), list)
def test_create_user(self):
data = {"name": "John", "email": "john@example.com"}
response = requests.post(f"{self.BASE_URL}/users", json=data)
assert response.status_code == 201
assert response.json()["name"] == "John"
# 运行: pytest test_api.py -v
4. 性能测试自动化
Locust - 负载测试
python
from locust import HttpUser, task, between
class WebsiteUser(HttpUser):
wait_time = between(1, 5)
@task
def view_homepage(self):
self.client.get("/")
@task(3) # 3倍权重
def view_products(self):
self.client.get("/products")
@task
def login(self):
self.client.post("/login", {
"username": "testuser",
"password": "testpass"
})
# 运行: locust -f locustfile.py
5. 部署自动化
Fabric - 远程部署
python
from fabric import Connection, task
@task
def deploy(c):
"""自动化部署到服务器"""
with Connection('user@server.com') as conn:
# 拉取最新代码
conn.run('cd /var/www/app && git pull')
# 安装依赖
conn.run('cd /var/www/app && pip install -r requirements.txt')
# 迁移数据库
conn.run('cd /var/www/app && python manage.py migrate')
# 重启服务
conn.run('sudo systemctl restart myapp')
print("部署完成!")
6. 监控自动化
网站可用性监控
python
import requests
import time
import smtplib
from email.mime.text import MIMEText
def monitor_website(url, check_interval=300):
"""监控网站可用性"""
while True:
try:
response = requests.get(url, timeout=10)
if response.status_code != 200:
send_alert(f"网站 {url} 返回状态码: {response.status_code}")
except requests.RequestException as e:
send_alert(f"网站 {url} 无法访问: {str(e)}")
time.sleep(check_interval)
def send_alert(message):
"""发送警报邮件"""
msg = MIMEText(message)
msg['Subject'] = '网站监控警报'
msg['From'] = 'monitor@example.com'
msg['To'] = 'admin@example.com'
with smtplib.SMTP('smtp.example.com', 587) as server:
server.starttls()
server.login('user', 'password')
server.send_message(msg)
7. 表单自动填写
自动化表单提交
python
from selenium import webdriver
from selenium.webdriver.common.by import By
def auto_fill_form(url, form_data):
driver = webdriver.Chrome()
try:
driver.get(url)
# 填写表单字段
for field_name, value in form_data.items():
element = driver.find_element(By.NAME, field_name)
element.clear()
element.send_keys(value)
# 提交表单
submit_btn = driver.find_element(By.XPATH, "//button[@type='submit']")
submit_btn.click()
print("表单提交成功!")
finally:
driver.quit()
# 使用示例
form_data = {
'username': 'testuser',
'email': 'test@example.com',
'message': '这是一条测试消息'
}
auto_fill_form('https://example.com/contact', form_data)
最佳实践
- 使用 headless 模式:节省资源,提高速度
- 添加延迟:避免被识别为机器人
- 错误处理:完善的异常处理机制
- 日志记录:记录自动化过程
- 遵守 robots.txt:尊重网站规则
- 使用代理:防止IP被封
这些工具和技术可以帮助你在 Web 领域实现各种自动化任务,从测试到部署,从数据抓取到监控。