引言:为什么需要异步编程?
想象一下,你要从10个网站下载数据。如果用传统方式,代码会等第一个网站响应后再请求第二个,这太慢了!异步编程让你可以同时发起所有请求,哪个先返回就处理哪个。
python
python
# 同步 vs 异步的直观对比
import time
def sync_download():
"""同步下载 - 一个一个来"""
websites = ["site1", "site2", "site3"]
for site in websites:
time.sleep(1) # 模拟网络延迟
print(f"下载完成: {site}")
# 同步方式需要3秒
# 异步方式可能只需要1秒多一点
一、异步基础:async/await
1.1 第一个异步程序
python
import asyncio
import time
print("=== 异步编程基础 ===")
# 定义一个异步函数
async def say_hello(name, delay):
"""异步打招呼函数"""
await asyncio.sleep(delay) # 异步等待
print(f"Hello, {name}! (等待了{delay}秒)")
return f"Hello {name}"
# 运行异步函数
async def main():
print("开始时间:", time.strftime("%H:%M:%S"))
# 运行一个异步任务
result = await say_hello("张三", 2)
print(f"返回值: {result}")
print("结束时间:", time.strftime("%H:%M:%S"))
# Python 3.7+ 的运行方式
print("运行单个异步任务:")
asyncio.run(main())
print("\n" + "="*50)
1.2 并发执行多个任务
python
async def download_file(filename, download_time):
"""模拟下载文件"""
print(f"开始下载: {filename}")
await asyncio.sleep(download_time)
print(f"下载完成: {filename} (耗时{download_time}秒)")
return f"{filename}_content"
async def concurrent_downloads():
"""并发下载多个文件"""
print("开始并发下载:")
start_time = time.time()
# 创建多个任务
task1 = asyncio.create_task(download_file("file1.txt", 3))
task2 = asyncio.create_task(download_file("file2.txt", 2))
task3 = asyncio.create_task(download_file("file3.txt", 1))
# 等待所有任务完成
results = await asyncio.gather(task1, task2, task3)
end_time = time.time()
print(f"所有文件下载完成! 总耗时: {end_time - start_time:.2f}秒")
print(f"下载结果: {results}")
return results
print("并发执行多个任务:")
asyncio.run(concurrent_downloads())
print("\n" + "="*50)
二、异步实战:网络请求
2.1 异步HTTP请求
python
import aiohttp
import asyncio
async def fetch_url(session, url, timeout=5):
"""异步获取URL内容"""
try:
async with session.get(url, timeout=timeout) as response:
if response.status == 200:
content = await response.text()
return {
'url': url,
'status': response.status,
'content_length': len(content),
'success': True
}
else:
return {
'url': url,
'status': response.status,
'success': False
}
except asyncio.TimeoutError:
return {
'url': url,
'status': 'timeout',
'success': False
}
except Exception as e:
return {
'url': url,
'status': 'error',
'error': str(e),
'success': False
}
async def fetch_multiple_urls(urls):
"""并发获取多个URL"""
async with aiohttp.ClientSession() as session:
tasks = [fetch_url(session, url) for url in urls]
results = await asyncio.gather(*tasks)
return results
# 测试函数
async def test_http_requests():
"""测试HTTP请求"""
# 一些测试URL(使用公开的测试API)
test_urls = [
"https://httpbin.org/delay/1", # 延迟1秒
"https://httpbin.org/delay/2", # 延迟2秒
"https://httpbin.org/status/200",
"https://httpbin.org/status/404",
"https://httpbin.org/ip",
"https://httpbin.org/user-agent"
]
print("开始并发HTTP请求...")
start_time = time.time()
results = await fetch_multiple_urls(test_urls)
end_time = time.time()
print(f"请求完成! 总耗时: {end_time - start_time:.2f}秒")
# 显示结果
print("\n请求结果:")
for i, result in enumerate(results, 1):
if result['success']:
print(f"{i}. {result['url']} - 成功, 状态码: {result['status']}, "
f"内容长度: {result['content_length']}")
else:
print(f"{i}. {result['url']} - 失败, 状态: {result['status']}")
return results
print("异步HTTP请求演示:")
# asyncio.run(test_http_requests())
print("(注意:由于网络请求需要真实URL,这里只展示代码结构)")
print("\n" + "="*50)
2.2 限制并发数
python
class RateLimiter:
"""速率限制器"""
def __init__(self, max_concurrent):
self.semaphore = asyncio.Semaphore(max_concurrent)
async def limited_fetch(self, session, url):
"""带限制的获取"""
async with self.semaphore:
return await fetch_url(session, url)
async def rate_limited_requests(urls, max_concurrent=3):
"""带速率限制的请求"""
rate_limiter = RateLimiter(max_concurrent)
async with aiohttp.ClientSession() as session:
tasks = []
for url in urls:
task = asyncio.create_task(
rate_limiter.limited_fetch(session, url)
)
tasks.append(task)
# 显示进度
print(f"开始处理 {len(urls)} 个URL,并发数: {max_concurrent}")
results = []
for i, task in enumerate(asyncio.as_completed(tasks), 1):
result = await task
results.append(result)
print(f"进度: {i}/{len(urls)} - {result['url']}")
return results
print("速率限制演示:")
print("可以控制同时发起的请求数量,避免对服务器造成过大压力")
print("\n" + "="*50)
三、异步文件操作
3.1 异步读写文件
python
import aiofiles
import os
async def async_write_file(filename, content):
"""异步写入文件"""
try:
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
await f.write(content)
print(f"文件写入成功: {filename}")
return True
except Exception as e:
print(f"文件写入失败 {filename}: {e}")
return False
async def async_read_file(filename):
"""异步读取文件"""
try:
async with aiofiles.open(filename, 'r', encoding='utf-8') as f:
content = await f.read()
print(f"文件读取成功: {filename}, 长度: {len(content)}")
return content
except Exception as e:
print(f"文件读取失败 {filename}: {e}")
return None
async def process_multiple_files():
"""处理多个文件"""
# 创建一些测试文件
files_data = {
"test1.txt": "这是第一个测试文件的内容\nHello World!",
"test2.txt": "这是第二个测试文件\nPython异步编程很有趣",
"test3.txt": "第三个文件\n" + "数据行\n" * 10
}
print("开始异步文件操作演示...")
# 并发写入文件
write_tasks = []
for filename, content in files_data.items():
task = asyncio.create_task(async_write_file(filename, content))
write_tasks.append(task)
write_results = await asyncio.gather(*write_tasks)
print(f"写入完成,成功: {sum(write_results)}/{len(write_results)}")
# 并发读取文件
read_tasks = []
for filename in files_data.keys():
task = asyncio.create_task(async_read_file(filename))
read_tasks.append(task)
read_results = await asyncio.gather(*read_tasks)
# 显示读取的内容摘要
print("\n文件内容摘要:")
for filename, content in zip(files_data.keys(), read_results):
if content:
preview = content[:50].replace('\n', ' ') + "..."
print(f"{filename}: {preview}")
# 清理测试文件
print("\n清理测试文件...")
for filename in files_data.keys():
try:
os.remove(filename)
print(f"删除: {filename}")
except:
pass
print("异步文件操作演示:")
# asyncio.run(process_multiple_files())
print("\n" + "="*50)
四、异步与数据库
4.1 异步数据库操作
python
# 这里以SQLite为例,实际中常用的是异步MySQL/PostgreSQL驱动
import aiosqlite
async def async_database_demo():
"""异步数据库操作演示"""
print("开始异步数据库演示...")
# 创建内存数据库
async with aiosqlite.connect(':memory:') as db:
# 创建表
await db.execute('''
CREATE TABLE users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
email TEXT UNIQUE NOT NULL,
age INTEGER
)
''')
await db.commit()
print("表创建成功")
# 插入数据
users = [
('张三', 'zhangsan@example.com', 25),
('李四', 'lisi@example.com', 30),
('王五', 'wangwu@example.com', 28),
('赵六', 'zhaoliu@example.com', 35)
]
# 并发插入
insert_tasks = []
for name, email, age in users:
task = asyncio.create_task(
db.execute(
"INSERT INTO users (name, email, age) VALUES (?, ?, ?)",
(name, email, age)
)
)
insert_tasks.append(task)
await asyncio.gather(*insert_tasks)
await db.commit()
print(f"插入 {len(users)} 条记录成功")
# 查询数据
async with db.execute("SELECT * FROM users ORDER BY age") as cursor:
rows = await cursor.fetchall()
print("\n用户列表 (按年龄排序):")
for row in rows:
print(f" ID: {row[0]}, 姓名: {row[1]}, 邮箱: {row[2]}, 年龄: {row[3]}")
# 复杂查询
print("\n统计信息:")
async with db.execute("SELECT COUNT(*), AVG(age) FROM users") as cursor:
count, avg_age = await cursor.fetchone()
print(f" 用户总数: {count}")
print(f" 平均年龄: {avg_age:.1f}")
# 事务演示
print("\n事务演示:")
try:
await db.execute("BEGIN TRANSACTION")
# 更新操作
await db.execute("UPDATE users SET age = age + 1 WHERE age < 30")
# 模拟一个可能失败的操作
should_fail = False
if should_fail:
raise Exception("模拟事务失败")
await db.execute("DELETE FROM users WHERE age > 40")
await db.execute("COMMIT")
print(" 事务提交成功")
except Exception as e:
await db.execute("ROLLBACK")
print(f" 事务回滚: {e}")
print("\n演示完成!")
print("异步数据库操作演示:")
# asyncio.run(async_database_demo())
print("\n" + "="*50)
五、异步Web框架:FastAPI实战
5.1 快速创建异步API
python
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from typing import List, Optional
import asyncio
# 创建FastAPI应用
app = FastAPI(title="异步API演示", version="1.0.0")
# 数据模型
class Item(BaseModel):
name: str
description: Optional[str] = None
price: float
stock: int = 0
class Order(BaseModel):
item_id: int
quantity: int
customer_name: str
# 模拟数据库
fake_items_db = {
1: Item(name="笔记本电脑", description="高性能游戏本", price=6999.99, stock=10),
2: Item(name="智能手机", description="最新款5G手机", price=3999.99, stock=50),
3: Item(name="平板电脑", description="轻薄便携", price=2999.99, stock=30)
}
orders_db = []
# 异步任务(模拟耗时操作)
async def process_order_background(order_id: int, order_data: Order):
"""后台处理订单"""
print(f"[后台] 开始处理订单 {order_id}")
await asyncio.sleep(3) # 模拟耗时处理
print(f"[后台] 订单 {order_id} 处理完成: {order_data.customer_name} 购买了 {order_data.quantity} 件商品")
# API端点
@app.get("/")
async def root():
"""根路径"""
return {"message": "欢迎使用异步API", "status": "运行中"}
@app.get("/items/", response_model=List[Item])
async def list_items(skip: int = 0, limit: int = 10):
"""获取商品列表(支持分页)"""
await asyncio.sleep(0.5) # 模拟数据库查询延迟
items = list(fake_items_db.values())[skip:skip + limit]
return items
@app.get("/items/{item_id}", response_model=Item)
async def get_item(item_id: int):
"""获取单个商品"""
if item_id not in fake_items_db:
raise HTTPException(status_code=404, detail="商品未找到")
# 模拟复杂的异步查询
await asyncio.sleep(0.3)
return fake_items_db[item_id]
@app.post("/orders/")
async def create_order(order: Order, background_tasks: BackgroundTasks):
"""创建订单(使用后台任务)"""
if order.item_id not in fake_items_db:
raise HTTPException(status_code=404, detail="商品不存在")
item = fake_items_db[order.item_id]
if order.quantity > item.stock:
raise HTTPException(status_code=400, detail="库存不足")
# 更新库存
item.stock -= order.quantity
# 生成订单ID
order_id = len(orders_db) + 1
# 将订单加入数据库
orders_db.append({
"id": order_id,
**order.dict(),
"total_price": item.price * order.quantity,
"status": "processing"
})
# 添加后台任务
background_tasks.add_task(process_order_background, order_id, order)
return {
"order_id": order_id,
"message": "订单已接收,正在处理中",
"total": item.price * order.quantity
}
@app.get("/orders/{order_id}")
async def get_order_status(order_id: int):
"""获取订单状态"""
if order_id < 1 or order_id > len(orders_db):
raise HTTPException(status_code=404, detail="订单不存在")
await asyncio.sleep(0.2) # 模拟查询延迟
return orders_db[order_id - 1]
# 运行服务器(这段代码在实际文件中运行)
if __name__ == "__main__":
import uvicorn
print("启动FastAPI服务器...")
print("访问 http://localhost:8000/docs 查看API文档")
uvicorn.run(app, host="0.0.0.0", port=8000)
print("FastAPI异步Web框架演示:")
print("1. 定义了Item和Order数据模型")
print("2. 创建了商品列表、查询、下单等API端点")
print("3. 使用BackgroundTasks处理耗时操作")
print("4. 支持异步数据库操作(示例中是内存存储)")
print("\n" + "="*50)
六、性能优化与错误处理
6.1 异步性能监控
python
import asyncio
import time
from contextlib import asynccontextmanager
class AsyncTimer:
"""异步计时器"""
def __init__(self, name):
self.name = name
self.start_time = None
self.end_time = None
async def __aenter__(self):
self.start_time = time.time()
print(f"[计时开始] {self.name}")
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self.end_time = time.time()
duration = self.end_time - self.start_time
print(f"[计时结束] {self.name}: {duration:.4f}秒")
if exc_type:
print(f"[错误] {self.name}: {exc_val}")
async def monitored_operation():
"""被监控的操作"""
async with AsyncTimer("monitored_operation"):
# 模拟一些异步操作
await asyncio.sleep(1)
# 模拟可能失败的操作
if time.time() % 2 > 1:
raise ValueError("随机错误")
# 更多操作
await asyncio.sleep(0.5)
return "操作成功"
async def error_handling_demo():
"""错误处理演示"""
print("=== 异步错误处理 ===")
# 1. 基本的try-except
try:
result = await monitored_operation()
print(f"结果: {result}")
except ValueError as e:
print(f"捕获到错误: {e}")
print("\n2. asyncio.gather的错误处理:")
async def task_with_error(n):
await asyncio.sleep(0.1)
if n == 2:
raise ValueError(f"任务 {n} 故意失败")
return f"任务 {n} 成功"
# 方式1: 全部完成,收集异常
tasks = [task_with_error(i) for i in range(5)]
results = await asyncio.gather(*tasks, return_exceptions=True)
print("所有任务结果:")
for i, result in enumerate(results):
if isinstance(result, Exception):
print(f" 任务 {i}: 失败 - {result}")
else:
print(f" 任务 {i}: 成功 - {result}")
print("\n3. asyncio.wait的错误处理:")
tasks = [asyncio.create_task(task_with_error(i)) for i in range(5)]
# 等待所有任务完成
done, pending = await asyncio.wait(tasks, return_when=asyncio.ALL_COMPLETED)
print("完成的任务:")
for task in done:
try:
result = task.result()
print(f" 成功: {result}")
except Exception as e:
print(f" 失败: {e}")
async def performance_optimization():
"""性能优化技巧"""
print("\n=== 性能优化技巧 ===")
# 1. 避免在热路径中使用await
print("1. 避免不必要的await:")
async def inefficient():
"""低效的实现"""
result = []
for i in range(10):
# 每次循环都await,效率低
await asyncio.sleep(0.01)
result.append(i)
return result
async def efficient():
"""高效的实现"""
# 批量创建任务
tasks = []
for i in range(10):
task = asyncio.create_task(asyncio.sleep(0.01))
tasks.append((task, i))
# 一次性等待所有任务
results = []
for task, i in tasks:
await task
results.append(i)
return results
# 2. 使用asyncio.Queue进行流量控制
print("\n2. 使用asyncio.Queue:")
async def worker(name, queue):
"""工作者协程"""
while True:
item = await queue.get()
if item is None: # 终止信号
break
print(f"{name} 处理: {item}")
await asyncio.sleep(0.1)
queue.task_done()
async def queue_demo():
"""队列演示"""
queue = asyncio.Queue(maxsize=5)
# 启动工作者
workers = [
asyncio.create_task(worker(f"Worker-{i}", queue))
for i in range(3)
]
# 生产项目
for i in range(10):
await queue.put(f"Item-{i}")
print(f"生产: Item-{i}")
# 等待所有项目被处理
await queue.join()
# 停止工作者
for _ in workers:
await queue.put(None)
await asyncio.gather(*workers)
await queue_demo()
print("性能优化与错误处理:")
asyncio.run(error_handling_demo())
# asyncio.run(performance_optimization())
print("\n" + "="*50)
七、实战项目:异步爬虫系统
python
import asyncio
import aiohttp
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import re
class AsyncWebCrawler:
"""异步网络爬虫"""
def __init__(self, start_url, max_depth=2, max_concurrent=10):
self.start_url = start_url
self.max_depth = max_depth
self.max_concurrent = max_concurrent
self.visited_urls = set()
self.results = []
self.semaphore = asyncio.Semaphore(max_concurrent)
def normalize_url(self, url, base_url):
"""标准化URL"""
if url.startswith('#'):
return None
if url.startswith('javascript:'):
return None
# 转换为绝对URL
absolute_url = urljoin(base_url, url)
# 移除片段标识符
parsed = urlparse(absolute_url)
return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
async def fetch_page(self, session, url):
"""获取页面内容"""
async with self.semaphore:
try:
async with session.get(url, timeout=10) as response:
if response.status == 200:
content_type = response.headers.get('content-type', '')
if 'text/html' in content_type:
html = await response.text()
return {
'url': url,
'html': html,
'status': 'success'
}
else:
return {
'url': url,
'status': 'not_html',
'content_type': content_type
}
else:
return {
'url': url,
'status': 'error',
'status_code': response.status
}
except Exception as e:
return {
'url': url,
'status': 'exception',
'error': str(e)
}
def extract_links(self, html, base_url):
"""从HTML中提取链接"""
soup = BeautifulSoup(html, 'html.parser')
links = []
for link in soup.find_all('a', href=True):
url = self.normalize_url(link['href'], base_url)
if url and url not in self.visited_urls:
links.append(url)
# 提取标题
title = soup.title.string if soup.title else "无标题"
# 提取正文(简化版)
text = soup.get_text()
text = re.sub(r'\s+', ' ', text).strip()
return links, title, text[:200] # 只取前200字符
async def crawl(self, session, url, depth=0):
"""递归爬取"""
if depth > self.max_depth:
return
if url in self.visited_urls:
return
self.visited_urls.add(url)
print(f"[深度 {depth}] 爬取: {url}")
# 获取页面
result = await self.fetch_page(session, url)
if result['status'] == 'success':
html = result['html']
links, title, preview = self.extract_links(html, url)
# 保存结果
self.results.append({
'url': url,
'title': title,
'preview': preview,
'depth': depth,
'link_count': len(links)
})
# 递归爬取链接
if depth < self.max_depth and links:
tasks = []
for link in links:
if link not in self.visited_urls:
task = asyncio.create_task(
self.crawl(session, link, depth + 1)
)
tasks.append(task)
if tasks:
await asyncio.gather(*tasks)
async def run(self):
"""运行爬虫"""
print(f"开始爬虫: {self.start_url}")
print(f"最大深度: {self.max_depth}, 最大并发: {self.max_concurrent}")
async with aiohttp.ClientSession() as session:
await self.crawl(session, self.start_url)
print(f"\n爬虫完成!")
print(f"总共访问了 {len(self.visited_urls)} 个URL")
print(f"收集了 {len(self.results)} 个页面")
return self.results
# 使用示例
async def crawler_demo():
"""爬虫演示"""
# 注意:实际使用时请遵守robots.txt,不要对网站造成压力
crawler = AsyncWebCrawler(
start_url="https://httpbin.org/html", # 使用测试页面
max_depth=1,
max_concurrent=5
)
results = await crawler.run()
# 显示结果
print("\n爬取结果:")
for i, result in enumerate(results[:5], 1): # 只显示前5个
print(f"{i}. {result['title']}")
print(f" 地址: {result['url']}")
print(f" 深度: {result['depth']}, 链接数: {result['link_count']}")
print(f" 预览: {result['preview']}")
print()
print("异步爬虫系统演示:")
print("这个爬虫可以并发访问多个页面,高效收集网络信息")
print("注意:实际使用时需要遵守网站规则,添加延迟避免被封")
print("\n" + "="*50)
总结:异步编程的最佳实践
适合异步的场景:
- ✅ I/O密集型任务(网络请求、文件读写、数据库查询)
- ✅ 高并发服务(Web服务器、API服务)
- ✅ 实时应用(聊天室、实时通知)
- ✅ 爬虫和数据采集
不适合异步的场景:
- ❌ CPU密集型任务(科学计算、图像处理)
- ❌ 简单脚本(没有并发需求)
- ❌ 对顺序有严格要求的任务