Python 异步编程完全指南(四):高级技巧与性能优化
前言
掌握基础后,本篇将带你学习异步编程的高级技巧,帮你写出更高效、更优雅的异步代码。
一、并发控制:信号量 (Semaphore)
1.1 为什么需要控制并发?
不控制并发的问题:
┌─────────────────────────────────────────────────────────────┐
│ 同时发起 1000 个请求 │
│ │ │
│ ▼ │
│ • 服务器被打爆 → 请求大量失败 │
│ • 内存暴涨 → 程序崩溃 │
│ • IP 被封禁 → 无法继续访问 │
└─────────────────────────────────────────────────────────────┘
1.2 信号量基础用法
python
import asyncio
async def worker(sem: asyncio.Semaphore, task_id: int):
async with sem: # 自动获取和释放信号量
print(f"任务 {task_id} 开始执行")
await asyncio.sleep(1)
print(f"任务 {task_id} 执行完成")
return task_id
async def main():
# 限制最多 3 个任务同时执行
semaphore = asyncio.Semaphore(3)
tasks = [
worker(semaphore, i)
for i in range(10)
]
results = await asyncio.gather(*tasks)
print(f"所有任务完成:{results}")
asyncio.run(main())
输出观察:任务是 3 个一组执行的,而不是 10 个同时开始。
1.3 BoundedSemaphore 更安全
python
import asyncio
async def main():
# BoundedSemaphore 不允许释放超过获取的次数
sem = asyncio.BoundedSemaphore(2)
await sem.acquire()
await sem.acquire()
sem.release()
sem.release()
# sem.release() # 这会抛出 ValueError!
print("正常完成")
asyncio.run(main())
1.4 实用封装:限流装饰器
python
import asyncio
from functools import wraps
def rate_limit(max_concurrent: int):
"""限流装饰器"""
semaphore = asyncio.Semaphore(max_concurrent)
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
async with semaphore:
return await func(*args, **kwargs)
return wrapper
return decorator
# 使用示例
@rate_limit(5) # 最多 5 个并发
async def fetch_url(url: str):
await asyncio.sleep(1)
return f"fetched: {url}"
async def main():
urls = [f"http://example.com/{i}" for i in range(20)]
tasks = [fetch_url(url) for url in urls]
results = await asyncio.gather(*tasks)
print(f"完成 {len(results)} 个请求")
asyncio.run(main())
二、超时控制
2.1 wait_for - 基础超时
python
import asyncio
async def slow_operation():
await asyncio.sleep(10)
return "完成"
async def main():
try:
# 设置 2 秒超时
result = await asyncio.wait_for(
slow_operation(),
timeout=2.0
)
print(f"结果:{result}")
except asyncio.TimeoutError:
print("操作超时!")
asyncio.run(main())
2.2 timeout 上下文管理器 (Python 3.11+)
python
import asyncio
async def main():
try:
async with asyncio.timeout(2.0):
await asyncio.sleep(10)
print("这行不会执行")
except TimeoutError:
print("超时了!")
# 可以继续执行其他代码
print("程序继续运行")
asyncio.run(main())
2.3 timeout_at - 指定截止时间
python
import asyncio
async def main():
loop = asyncio.get_running_loop()
# 设置绝对截止时间:当前时间 + 2 秒
deadline = loop.time() + 2.0
try:
async with asyncio.timeout_at(deadline):
await asyncio.sleep(10)
except TimeoutError:
print("到达截止时间!")
asyncio.run(main())
2.4 shield - 保护任务不被取消
python
import asyncio
async def important_task():
"""重要任务,即使超时也要完成"""
print("重要任务开始")
await asyncio.sleep(3)
print("重要任务完成")
return "重要结果"
async def main():
task = asyncio.create_task(important_task())
try:
# shield 保护任务不被取消
result = await asyncio.wait_for(
asyncio.shield(task),
timeout=1.0
)
except asyncio.TimeoutError:
print("等待超时,但任务继续在后台执行")
# 任务仍在运行,可以稍后获取结果
result = await task
print(f"最终结果:{result}")
asyncio.run(main())
三、优雅取消任务
3.1 基础取消
python
import asyncio
async def long_running_task():
try:
while True:
print("工作中...")
await asyncio.sleep(1)
except asyncio.CancelledError:
print("收到取消信号,执行清理...")
await asyncio.sleep(0.5) # 清理工作
print("清理完成")
raise # 重新抛出,让调用者知道任务已取消
async def main():
task = asyncio.create_task(long_running_task())
# 运行 3 秒后取消
await asyncio.sleep(3)
print("准备取消任务...")
task.cancel()
try:
await task
except asyncio.CancelledError:
print("任务已成功取消")
asyncio.run(main())
3.2 取消多个任务
python
import asyncio
async def worker(id: int):
try:
while True:
print(f"Worker-{id} 运行中")
await asyncio.sleep(1)
except asyncio.CancelledError:
print(f"Worker-{id} 被取消")
raise
async def main():
# 创建多个任务
tasks = [
asyncio.create_task(worker(i), name=f"worker-{i}")
for i in range(3)
]
# 运行 2 秒
await asyncio.sleep(2)
# 取消所有任务
print("\n开始取消所有任务...")
for task in tasks:
task.cancel()
# 等待所有任务完成取消
results = await asyncio.gather(*tasks, return_exceptions=True)
print(f"\n取消结果:{len([r for r in results if isinstance(r, asyncio.CancelledError)])} 个任务已取消")
asyncio.run(main())
3.3 取消超时处理
python
import asyncio
async def cancellable_work(timeout: float):
"""可取消的工作,带超时"""
task = asyncio.current_task()
try:
# 创建超时取消的任务
async def auto_cancel():
await asyncio.sleep(timeout)
task.cancel()
cancel_task = asyncio.create_task(auto_cancel())
# 执行实际工作
while True:
print("工作中...")
await asyncio.sleep(0.5)
except asyncio.CancelledError:
print("工作被取消(超时或手动)")
raise
finally:
cancel_task.cancel()
async def main():
try:
await cancellable_work(timeout=2.0)
except asyncio.CancelledError:
print("主函数捕获到取消")
asyncio.run(main())
四、异步上下文管理器
4.1 基础实现
python
import asyncio
class AsyncDatabase:
"""异步数据库连接示例"""
def __init__(self, name: str):
self.name = name
self.connected = False
async def __aenter__(self):
print(f"正在连接数据库 {self.name}...")
await asyncio.sleep(0.5) # 模拟连接过程
self.connected = True
print(f"数据库 {self.name} 已连接")
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
print(f"正在关闭数据库 {self.name}...")
await asyncio.sleep(0.3) # 模拟关闭过程
self.connected = False
print(f"数据库 {self.name} 已关闭")
return False # 不抑制异常
async def query(self, sql: str):
if not self.connected:
raise RuntimeError("数据库未连接")
await asyncio.sleep(0.1)
return f"查询结果:{sql}"
async def main():
async with AsyncDatabase("mydb") as db:
result = await db.query("SELECT * FROM users")
print(result)
print("所有资源已清理")
asyncio.run(main())
4.2 使用 contextlib
python
import asyncio
from contextlib import asynccontextmanager
@asynccontextmanager
async def async_timer(name: str):
"""异步计时器上下文管理器"""
import time
start = time.time()
print(f"[{name}] 开始计时")
try:
yield # 执行 with 块中的代码
finally:
elapsed = time.time() - start
print(f"[{name}] 耗时:{elapsed:.3f}s")
@asynccontextmanager
async def acquire_lock(lock: asyncio.Lock, timeout: float = None):
"""带超时的锁获取"""
acquired = False
try:
if timeout:
acquired = await asyncio.wait_for(lock.acquire(), timeout)
else:
await lock.acquire()
acquired = True
yield acquired
finally:
if acquired:
lock.release()
async def main():
# 使用计时器
async with async_timer("数据处理"):
await asyncio.sleep(1)
# 使用带超时的锁
lock = asyncio.Lock()
async with acquire_lock(lock, timeout=5.0) as acquired:
if acquired:
print("成功获取锁")
await asyncio.sleep(0.5)
asyncio.run(main())
五、异步迭代器与生成器
5.1 异步迭代器
python
import asyncio
class AsyncRange:
"""异步范围迭代器"""
def __init__(self, start: int, end: int, delay: float = 0.1):
self.start = start
self.end = end
self.delay = delay
self.current = start
def __aiter__(self):
return self
async def __anext__(self):
if self.current >= self.end:
raise StopAsyncIteration
await asyncio.sleep(self.delay) # 模拟异步操作
value = self.current
self.current += 1
return value
async def main():
print("异步迭代器示例:")
async for num in AsyncRange(0, 5):
print(f" 收到:{num}")
asyncio.run(main())
5.2 异步生成器
python
import asyncio
async def async_data_stream(n: int):
"""异步数据流生成器"""
for i in range(n):
await asyncio.sleep(0.1) # 模拟数据获取延迟
yield {
"id": i,
"data": f"item_{i}",
"timestamp": asyncio.get_event_loop().time()
}
async def async_filter(stream, predicate):
"""异步过滤器"""
async for item in stream:
if predicate(item):
yield item
async def main():
print("异步生成器示例:")
# 生成数据流
stream = async_data_stream(10)
# 过滤偶数 ID
filtered = async_filter(stream, lambda x: x["id"] % 2 == 0)
# 消费数据
async for item in filtered:
print(f" {item}")
asyncio.run(main())
5.3 异步推导式
python
import asyncio
async def fetch_item(id: int):
await asyncio.sleep(0.1)
return {"id": id, "value": id * 10}
async def main():
# 异步列表推导式
items = [await fetch_item(i) for i in range(5)]
print(f"列表推导:{items}")
# 注意:上面是顺序执行的!
# 要并发执行,应该用 gather
tasks = [fetch_item(i) for i in range(5)]
items = await asyncio.gather(*tasks)
print(f"并发获取:{items}")
asyncio.run(main())
六、性能优化技巧
6.1 连接池复用
python
import asyncio
import aiohttp
# ❌ 错误:每次请求创建新会话
async def bad_fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
# ✅ 正确:复用会话
async def good_fetch(session: aiohttp.ClientSession, url: str):
async with session.get(url) as resp:
return await resp.text()
async def main():
urls = [f"http://example.com/{i}" for i in range(100)]
# 创建一个会话,复用于所有请求
async with aiohttp.ClientSession() as session:
tasks = [good_fetch(session, url) for url in urls]
results = await asyncio.gather(*tasks)
print(f"完成 {len(results)} 个请求")
6.2 批量处理
python
import asyncio
from typing import List
async def process_item(item):
await asyncio.sleep(0.01)
return item * 2
async def process_batch(items: List, batch_size: int = 100):
"""分批处理大量数据"""
results = []
for i in range(0, len(items), batch_size):
batch = items[i:i + batch_size]
batch_results = await asyncio.gather(
*[process_item(item) for item in batch]
)
results.extend(batch_results)
print(f"已处理:{len(results)}/{len(items)}")
return results
async def main():
items = list(range(1000))
results = await process_batch(items, batch_size=100)
print(f"处理完成:{len(results)} 项")
asyncio.run(main())
6.3 避免不必要的 await
python
import asyncio
# ❌ 不必要的 await
async def bad_example():
result = await asyncio.sleep(0) # 无意义的等待
data = await some_sync_function() # 同步函数不需要 await
return data
# ✅ 只在需要时 await
async def good_example():
# 同步操作直接执行
data = process_data()
# 只有真正的异步操作才 await
result = await fetch_from_network()
return result
6.4 使用 run_in_executor 处理 CPU 密集型任务
python
import asyncio
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
def cpu_intensive_task(n):
"""CPU 密集型任务"""
return sum(i * i for i in range(n))
async def main():
loop = asyncio.get_running_loop()
# 使用线程池执行(适合 I/O 阻塞)
with ThreadPoolExecutor() as pool:
result = await loop.run_in_executor(
pool,
cpu_intensive_task,
1000000
)
print(f"线程池结果:{result}")
# 使用进程池执行(适合 CPU 密集)
with ProcessPoolExecutor() as pool:
result = await loop.run_in_executor(
pool,
cpu_intensive_task,
1000000
)
print(f"进程池结果:{result}")
asyncio.run(main())
6.5 性能对比表
| 优化项 | 优化前 | 优化后 | 提升 |
|---|---|---|---|
| 连接复用 | 每请求新建连接 | 复用连接池 | 3-5x |
| 批量处理 | 逐个处理 | 批量并发 | 10x+ |
| CPU 任务 | 阻塞事件循环 | run_in_executor | 不阻塞 |
| 合理并发 | 无限制 | Semaphore 控制 | 更稳定 |
七、本篇小结
本篇我们学习了:
- ✅ 信号量控制:精确控制并发数量
- ✅ 超时控制:wait_for、timeout、shield
- ✅ 优雅取消:正确处理 CancelledError
- ✅ 异步上下文管理器:资源的自动管理
- ✅ 异步迭代器与生成器:流式数据处理
- ✅ 性能优化:连接池、批量处理、executor
高级技巧速查
python
# 1. 信号量限流
sem = asyncio.Semaphore(10)
async with sem:
await task()
# 2. 超时控制
result = await asyncio.wait_for(task(), timeout=5.0)
# 3. 保护任务
await asyncio.shield(important_task())
# 4. 异步上下文
async with resource:
await do_work()
# 5. 异步迭代
async for item in async_generator():
process(item)
# 6. CPU 任务转移
await loop.run_in_executor(pool, cpu_task)
下篇预告
在最后一篇 避坑指南篇 中,我们将学习:
- 6 大常见陷阱及解决方案
- 异步生态:常用库推荐
- 学习路线图与资源推荐
如果这篇文章对你有帮助,欢迎点赞、收藏、关注!有问题欢迎评论区讨论。
- - - - - - >