【并发编程】Python异步编程实战:从协程到异步框架
前言
随着互联网应用规模的不断扩大,高并发已经成为现代后端开发的核心挑战之一。在传统的同步编程模型中,线程是实现并发的常用方式,但线程的创建和切换开销较大,且多线程编程容易引发各种复杂的同步问题。Python的异步编程提供了一种更轻量级的并发方案,能够在单线程内实现高并发,大幅提升I/O密集型应用的性能。
异步编程在Web开发、数据爬取、API调用、实时通信等场景有着广泛的应用。Python生态系统中的异步框架如asyncio、aiohttp、FastAPI等,为开发者提供了强大的异步编程支持。掌握异步编程,是成为现代Python后端开发者的必备技能。
本文将从协程的基础概念讲起,深入讲解asyncio的核心原理和实战应用,帮助读者建立完整的异步编程知识体系。
一、协程基础概念
1.1 什么是协程
协程(Coroutine)是一种比线程更轻量的执行单元,它可以在单线程内实现多个执行流之间的切换。与线程由操作系统调度不同,协程的调度完全由程序员控制,这意味着我们可以精确地决定何时切换、为何切换。
协程的核心优势在于:协程切换不需要内核态与用户态之间的切换,开销极小;协程是串行执行的,不需要加锁,不存在竞态条件;协程让异步代码看起来像同步代码,更容易理解和维护。
python
# 协程示例:简单的生产者-消费者模型
def producer():
"""生产者:生成数据"""
for i in range(5):
print(f"生产: {i}")
yield i # 暂停并返回值
def consumer():
"""消费者:处理数据"""
received = []
while len(received) < 5:
# 从生成器获取值
item = next(coroutine)
received.append(item)
print(f"消费: {item}")
return received
# 创建协程
coroutine = producer()
# 运行协程直到它yield
result = consumer()
print(f"最终结果: {result}")
1.2 生成器与协程的关系
在Python中,协程最初是基于生成器的语法实现的。生成器通过yield语句可以暂停执行,而协程则利用这一特性实现协作式多任务。
Python 2.5引入了send()方法,允许向生成器发送值,这使得生成器具备了协程的雏形。Python 3.4引入了asyncio模块,Python 3.5引入了async/await语法,协程才成为真正的语言特性。
python
# 早期风格的协程(使用send)
def coro():
"""使用send的协程"""
print("协程开始")
value = yield "初始值" # yield可以返回值
print(f"收到值: {value}")
value = yield f"处理: {value}"
print(f"收到值: {value}")
yield "结束"
c = coro()
# 首先需要send(None)或next()启动协程
result1 = c.send(None) # 或 next(c)
print(f"第1个yield值: {result1}")
result2 = c.send("你好")
print(f"第2个yield值: {result2}")
result3 = c.send("再见")
print(f"第3个yield值: {result3}")
1.3 async/await语法
Python 3.5引入了async/await语法,使得协程的表达更加清晰和直观。
python
# 使用async/await定义协程
async def fetch_data():
"""模拟异步获取数据"""
print("开始获取数据...")
await asyncio.sleep(1) # 模拟I/O操作
print("数据获取完成")
return {"data": "result", "status": 200}
async def process_data(data):
"""模拟异步处理数据"""
print("开始处理数据...")
await asyncio.sleep(0.5)
print("数据处理完成")
return data.upper()
async def main():
"""主协程"""
# 创建协程任务
data = await fetch_data()
result = await process_data(data['data'])
print(f"最终结果: {result}")
# 运行协程
asyncio.run(main())
二、asyncio核心原理
2.1 事件循环机制
事件循环是异步编程的核心,它负责调度和执行协程任务。事件循环的工作原理是:不断从任务队列中取出任务执行,当遇到await表达式时,挂起当前任务,转而执行其他任务;当被挂起的任务完成后,将其重新放回任务队列等待执行。
python
# 事件循环的基本使用
import asyncio
def hello():
"""同步函数"""
print("Hello")
async def async_hello():
"""异步函数"""
print("Async Hello")
await asyncio.sleep(0)
print("Async Hello again")
# 方法1:使用asyncio.run(推荐)
asyncio.run(async_hello())
# 方法2:手动创建事件循环
loop = asyncio.new_event_loop()
try:
loop.run_until_complete(async_hello())
finally:
loop.close()
# 方法3:获取当前事件循环
async def with_existing_loop():
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
await loop.run_until_complete(async_hello())
2.2 Task与Future
Future是一个表示异步操作结果的对象,类似于一个占位符,最终会被填充为操作的实际结果。Task是Future的子类,专门用于包装协程。
python
import asyncio
from asyncio import Future, Task
async def my_coro():
"""异步函数"""
await asyncio.sleep(1)
return "Done"
async def main():
# 创建Future
future = Future()
# 在另一个协程中设置Future的值
async def set_future():
await asyncio.sleep(2)
future.set_result("Future completed!")
# 创建Task执行协程
asyncio.create_task(set_future())
# 等待Future完成
result = await future
print(f"Future result: {result}")
# 创建Task
task = asyncio.create_task(my_coro())
print(f"Task: {task}")
print(f"Task done? {task.done()}")
# 等待Task完成
result = await task
print(f"Task result: {result}")
asyncio.run(main())
2.3 await表达式的工作原理
await表达式会暂停当前协程的执行,等待另一个协程完成并返回结果。在等待期间,事件循环可以执行其他协程。
python
import asyncio
import time
async def operation(name, duration):
"""模拟一个异步操作"""
print(f"{name} 开始 (预计 {duration}s)")
start = time.time()
await asyncio.sleep(duration) # 暂停此协程
elapsed = time.time() - start
print(f"{name} 完成 (实际 {elapsed:.2f}s)")
return f"{name} result"
async def sequential_execution():
"""顺序执行"""
start = time.time()
r1 = await operation("操作1", 2)
r2 = await operation("操作2", 2)
r3 = await operation("操作3", 2)
total = time.time() - start
print(f"顺序执行总耗时: {total:.2f}s")
return [r1, r2, r3]
async def concurrent_execution():
"""并发执行"""
start = time.time()
# 创建三个任务并发执行
r1, r2, r3 = await asyncio.gather(
operation("操作1", 2),
operation("操作2", 2),
operation("操作3", 2),
)
total = time.time() - start
print(f"并发执行总耗时: {total:.2f}s")
return [r1, r2, r3]
async def main():
print("=" * 50)
print("顺序执行:")
await sequential_execution()
print("=" * 50)
print("并发执行:")
await concurrent_execution()
asyncio.run(main())
# 输出:
# 顺序执行总耗时: 6.02s(2+2+2)
# 并发执行总耗时: 2.01s(三个任务同时执行)
三、异步上下文管理
3.1 异步上下文管理器
异步上下文管理器使用__aenter__和__aexit__方法,支持在async with语句中使用。
python
import asyncio
class AsyncTimer:
"""异步计时器上下文管理器"""
def __init__(self, name):
self.name = name
self.start_time = None
async def __aenter__(self):
"""进入上下文时执行"""
self.start_time = asyncio.get_event_loop().time()
print(f"[{self.name}] 开始")
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""退出上下文时执行"""
elapsed = asyncio.get_event_loop().time() - self.start_time
print(f"[{self.name}] 结束,耗时: {elapsed:.2f}s")
# 返回True可以抑制异常
return False
async def main():
async with AsyncTimer("任务1"):
await asyncio.sleep(1)
async with AsyncTimer("任务2"):
await asyncio.sleep(0.5)
asyncio.run(main())
3.2 异步迭代器
异步迭代器使用__aiter__和__anext__方法,支持async for语句。
python
import asyncio
class AsyncCounter:
"""异步计数器迭代器"""
def __init__(self, max_value):
self.max_value = max_value
self.current = 0
def __aiter__(self):
return self
async def __anext__(self):
if self.current >= self.max_value:
raise StopAsyncIteration
await asyncio.sleep(0.1) # 模拟异步操作
value = self.current
self.current += 1
return value
async def main():
async for i in AsyncCounter(5):
print(f"计数: {i}")
asyncio.run(main())
# 异步生成器
async def async_range(start, end, step=1):
"""异步生成器"""
current = start
while current < end:
await asyncio.sleep(0.1)
yield current
current += step
async def main2():
async for i in async_range(0, 5):
print(f"异步生成器: {i}")
asyncio.run(main2())
四、asyncio并发模式
4.1 并发执行多个任务
asyncio.gather是最常用的并发执行多个协程的方式,它等待所有任务完成并返回结果列表。
python
import asyncio
import random
async def fetch_url(url):
"""模拟获取URL"""
delay = random.uniform(0.5, 2.0)
await asyncio.sleep(delay)
return f"{url} - OK"
async def main():
urls = [
"https://example.com/api/users",
"https://example.com/api/posts",
"https://example.com/api/comments",
"https://example.com/api/tags",
]
# 并发执行所有URL请求
results = await asyncio.gather(*[fetch_url(url) for url in urls])
for url, result in zip(urls, results):
print(f"{url}: {result}")
asyncio.run(main())
4.2 任务组(TaskGroup)
Python 3.11引入了TaskGroup,提供了更优雅的任务管理方式。
python
import asyncio
async def task_with_error(task_id):
"""可能出错的任务"""
await asyncio.sleep(task_id * 0.5)
if task_id == 2:
raise ValueError(f"Task {task_id} failed!")
return f"Task {task_id} completed"
async def main():
# 使用TaskGroup
async with asyncio.TaskGroup() as tg:
# 创建任务
tasks = []
for i in range(4):
task = tg.create_task(task_with_error(i))
tasks.append(task)
# TaskGroup会自动等待所有任务完成
# 如果任一任务抛出异常,其他任务会被取消
# 访问结果
for task in tasks:
if task.result() is not None:
print(f"Result: {task.result()}")
asyncio.run(main())
4.3 信号量控制并发数
使用信号量可以限制同时执行的任务数量,防止资源耗尽。
python
import asyncio
import time
async def limited_task(task_id, semaphore):
"""限流任务"""
async with semaphore: # 获取信号量
print(f"Task {task_id} 开始")
await asyncio.sleep(1)
print(f"Task {task_id} 结束")
return f"Task {task_id} done"
async def main():
# 创建信号量,限制最多3个并发
semaphore = asyncio.Semaphore(3)
start = time.time()
# 创建10个任务,但最多只有3个同时执行
tasks = [limited_task(i, semaphore) for i in range(10)]
results = await asyncio.gather(*tasks)
elapsed = time.time() - start
print(f"总耗时: {elapsed:.2f}s") # 约4秒(10/3 ≈ 4)
print(f"结果: {results}")
asyncio.run(main())
4.4 队列处理
asyncio.Queue提供了异步队列,用于生产者和消费者模式。
python
import asyncio
import random
async def producer(queue, producer_id):
"""生产者"""
for i in range(3):
item = f"P{producer_id}-Item{i}"
await queue.put(item)
print(f"生产者 {producer_id} 生产: {item}")
await asyncio.sleep(random.uniform(0.1, 0.5))
async def consumer(queue, consumer_id):
"""消费者"""
while True:
try:
item = await asyncio.wait_for(queue.get(), timeout=2.0)
print(f"消费者 {consumer_id} 消费: {item}")
await asyncio.sleep(1) # 模拟处理
queue.task_done()
except asyncio.TimeoutError:
print(f"消费者 {consumer_id} 等待超时,退出")
break
async def main():
queue = asyncio.Queue(maxsize=10)
# 创建生产者和消费者
producers = [asyncio.create_task(producer(queue, i)) for i in range(2)]
consumers = [asyncio.create_task(consumer(queue, i)) for i in range(3)]
# 等待所有生产者完成
await asyncio.gather(*producers)
# 等待队列清空
await queue.join()
# 取消所有消费者
for c in consumers:
c.cancel()
await asyncio.gather(*consumers, return_exceptions=True)
asyncio.run(main())
五、异步HTTP客户端实战
5.1 使用aiohttp
aiohttp是Python最流行的异步HTTP客户端库。
python
import asyncio
import aiohttp
async def fetch_with_aiohttp(url, session):
"""使用aiohttp获取URL"""
async with session.get(url) as response:
if response.status == 200:
return await response.text()
return None
async def concurrent_requests(urls):
"""并发请求多个URL"""
async with aiohttp.ClientSession() as session:
tasks = [fetch_with_aiohttp(url, session) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
return results
async def fetch_with_headers():
"""带请求头的请求"""
async with aiohttp.ClientSession() as session:
headers = {
"User-Agent": "AsyncBot/1.0",
"Accept": "application/json",
}
async with session.get(
"https://httpbin.org/headers",
headers=headers
) as response:
if response.status == 200:
return await response.json()
async def post_request():
"""POST请求"""
async with aiohttp.ClientSession() as session:
payload = {"username": "test", "password": "secret"}
async with session.post(
"https://httpbin.org/post",
json=payload
) as response:
return await response.json()
async def main():
urls = [
"https://httpbin.org/delay/1",
"https://httpbin.org/delay/2",
"https://httpbin.org/delay/1",
]
print("并发请求:")
start = time.time()
results = await concurrent_requests(urls)
print(f"耗时: {time.time() - start:.2f}s")
print(f"结果数量: {len(results)}")
print("\n带Headers请求:")
headers_result = await fetch_with_headers()
print(headers_result)
print("\nPOST请求:")
post_result = await post_request()
print(post_result.get("json", {}))
import time
asyncio.run(main())
5.2 连接池和超时控制
python
import asyncio
import aiohttp
async def fetch_with_pool():
"""使用连接池"""
# 创建连接限制器
connector = aiohttp.TCPConnector(limit=10, limit_per_host=5)
async with aiohttp.ClientSession(connector=connector) as session:
async with session.get("https://httpbin.org/get") as response:
return await response.json()
async def fetch_with_timeout():
"""设置超时"""
timeout = aiohttp.ClientTimeout(total=30, connect=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with session.get("https://httpbin.org/delay/5") as response:
return await response.text()
except asyncio.TimeoutError:
print("请求超时!")
return None
async def retry_request(url, max_retries=3):
"""带重试的请求"""
for attempt in range(max_retries):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status < 500:
return await response.json()
raise aiohttp.ClientError(f"HTTP {response.status}")
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
print(f"尝试 {attempt + 1} 失败: {e}")
if attempt == max_retries - 1:
raise
await asyncio.sleep(2 ** attempt) # 指数退避
async def main():
result = await fetch_with_pool()
print("连接池请求完成:", result.get("url"))
result = await fetch_with_timeout()
print("超时请求:", result)
asyncio.run(main())
六、异步Web框架实战
6.1 FastAPI异步API开发
FastAPI是现代Python异步Web框架的代表作,性能优秀且易用。
python
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
from pydantic import BaseModel, EmailStr
from typing import Optional, List
import asyncio
import time
app = FastAPI(title="异步API示例", version="1.0.0")
# Pydantic模型定义请求和响应
class UserCreate(BaseModel):
username: str
email: EmailStr
password: str
class User(BaseModel):
id: int
username: str
email: str
is_active: bool = True
class OrderItem(BaseModel):
product_id: int
quantity: int
price: float
class OrderCreate(BaseModel):
user_id: int
items: List[OrderItem]
# 模拟数据库
users_db = []
orders_db = []
# 异步依赖注入
async def get_db():
"""模拟数据库会话"""
await asyncio.sleep(0.1) # 模拟数据库延迟
return {"users": users_db, "orders": orders_db}
@app.get("/")
async def root():
return {"message": "FastAPI 异步API示例"}
@app.get("/users/{user_id}", response_model=User)
async def get_user(user_id: int):
"""获取用户"""
await asyncio.sleep(0.05) # 模拟DB查询
for user in users_db:
if user["id"] == user_id:
return User(**user)
raise HTTPException(status_code=404, detail="用户不存在")
@app.post("/users/", response_model=User, status_code=201)
async def create_user(user: UserCreate):
"""创建用户"""
# 检查用户名是否已存在
if any(u["username"] == user.username for u in users_db):
raise HTTPException(status_code=400, detail="用户名已存在")
new_user = {
"id": len(users_db) + 1,
"username": user.username,
"email": user.email,
"is_active": True
}
users_db.append(new_user)
return User(**new_user)
@app.get("/users/", response_model=List[User])
async def list_users(skip: int = 0, limit: int = 10):
"""获取用户列表(分页)"""
await asyncio.sleep(0.05)
return [User(**u) for u in users_db[skip:skip+limit]]
@app.post("/orders/", status_code=201)
async def create_order(order: OrderCreate, db=Depends(get_db)):
"""创建订单"""
# 验证用户存在
user_exists = any(u["id"] == order.user_id for u in db["users"])
if not user_exists:
raise HTTPException(status_code=404, detail="用户不存在")
# 计算总金额
total = sum(item.quantity * item.price for item in order.items)
new_order = {
"id": len(db["orders"]) + 1,
"user_id": order.user_id,
"items": [item.dict() for item in order.items],
"total": total,
"status": "pending"
}
db["orders"].append(new_order)
return new_order
@app.get("/orders/{order_id}")
async def get_order(order_id: int, db=Depends(get_db)):
"""获取订单"""
for order in db["orders"]:
if order["id"] == order_id:
return order
raise HTTPException(status_code=404, detail="订单不存在")
@app.delete("/orders/{order_id}")
async def cancel_order(order_id: int, db=Depends(get_db)):
"""取消订单"""
for order in db["orders"]:
if order["id"] == order_id:
if order["status"] != "pending":
raise HTTPException(status_code=400, detail="只能取消待处理订单")
order["status"] = "cancelled"
return {"message": "订单已取消"}
raise HTTPException(status_code=404, detail="订单不存在")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
6.2 异步后台任务
python
from fastapi import FastAPI, BackgroundTasks
from pydantic import BaseModel
import asyncio
app = FastAPI()
class EmailRequest(BaseModel):
to: str
subject: str
body: str
def send_email_sync(email: EmailRequest):
"""同步发送邮件(模拟)"""
import time
print(f"发送邮件到 {email.to}...")
time.sleep(5) # 模拟耗时操作
print(f"邮件已发送: {email.subject}")
async def send_email_async(email: EmailRequest):
"""异步发送邮件"""
print(f"异步发送邮件到 {email.to}...")
await asyncio.sleep(5) # 模拟I/O操作
print(f"邮件已发送: {email.subject}")
@app.post("/send-email-sync")
def endpoint_sync(email: EmailRequest, background_tasks: BackgroundTasks):
"""同步邮件发送(后台执行)"""
background_tasks.add_task(send_email_sync, email)
return {"message": "邮件发送任务已添加"}
@app.post("/send-email-async")
async def endpoint_async(email: EmailRequest):
"""异步邮件发送"""
await send_email_async(email)
return {"message": "邮件已发送"}
@app.post("/send-email-bg")
async def endpoint_bg(email: EmailRequest, background_tasks: BackgroundTasks):
"""混合模式:后台任务中的异步函数"""
background_tasks.add_task(send_email_async, email)
return {"message": "邮件发送任务已在后台添加"}
七、异步编程最佳实践
7.1 避免阻塞调用
在异步代码中,应避免使用同步阻塞调用,如time.sleep()、requests.get()等。
python
import asyncio
import time
# 错误:在异步代码中使用time.sleep
async def bad_example():
print("开始")
time.sleep(5) # 阻塞整个事件循环!
print("结束")
# 正确:使用asyncio.sleep
async def good_example():
print("开始")
await asyncio.sleep(5) # 只暂停当前协程
print("结束")
# 对于必须使用阻塞I/O的场景,使用线程池
async def run_blocking_in_executor():
"""使用线程池执行阻塞代码"""
def blocking_io():
import time
time.sleep(3)
return "Blocking result"
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, blocking_io)
return result
7.2 异常处理
python
import asyncio
async def may_fail():
"""可能失败的任务"""
await asyncio.sleep(1)
if True: # 模拟失败条件
raise ValueError("Something went wrong!")
return "Success"
async def main():
# 方法1:try-except
try:
result = await may_fail()
print(f"结果: {result}")
except ValueError as e:
print(f"捕获异常: {e}")
# 方法2:asyncio.gather with return_exceptions
results = await asyncio.gather(
asyncio.sleep(1, result=1),
may_fail(),
asyncio.sleep(1, result=3),
return_exceptions=True
)
print(f"gather结果: {results}") # [1, ValueError(...), 3]
asyncio.run(main())
7.3 取消任务
python
import asyncio
async def long_running_task(task_id):
"""长时间运行的任务"""
try:
for i in range(10):
print(f"任务 {task_id}: 第 {i} 步")
await asyncio.sleep(1)
return f"任务 {task_id} 完成"
except asyncio.CancelledError:
print(f"任务 {task_id} 被取消")
raise
async def main():
task = asyncio.create_task(long_running_task(1))
# 等待5秒后取消任务
await asyncio.sleep(5)
task.cancel()
try:
await task
except asyncio.CancelledError:
print("任务已被成功取消")
asyncio.run(main())
八、综合实战案例
8.1 异步数据爬虫
python
import asyncio
import aiohttp
from dataclasses import dataclass
from typing import List, Optional
import re
@dataclass
class Article:
"""文章数据结构"""
title: str
url: str
author: Optional[str]
publish_date: Optional[str]
content: Optional[str]
class AsyncCrawler:
"""异步爬虫"""
def __init__(self, max_concurrent: int = 5):
self.max_concurrent = max_concurrent
self.semaphore = asyncio.Semaphore(max_concurrent)
self.session: Optional[aiohttp.ClientSession] = None
self.results: List[Article] = []
async def __aenter__(self):
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def fetch_page(self, url: str) -> Optional[str]:
"""获取单个页面"""
async with self.semaphore:
try:
async with self.session.get(url, timeout=30) as response:
if response.status == 200:
return await response.text()
except Exception as e:
print(f"获取失败 {url}: {e}")
return None
async def parse_article(self, url: str) -> Optional[Article]:
"""解析文章"""
html = await self.fetch_page(url)
if not html:
return None
# 简化解析逻辑
title_match = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.DOTALL)
title = title_match.group(1).strip() if title_match else "Unknown"
return Article(
title=title,
url=url,
author=None,
publish_date=None,
content=None
)
async def crawl(self, urls: List[str]) -> List[Article]:
"""爬取多个URL"""
tasks = [self.parse_article(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
for result in results:
if isinstance(result, Article):
self.results.append(result)
return self.results
async def main():
urls = [
"https://example.com/article1",
"https://example.com/article2",
"https://example.com/article3",
]
async with AsyncCrawler(max_concurrent=3) as crawler:
articles = await crawler.crawl(urls)
print(f"成功爬取 {len(articles)} 篇文章")
asyncio.run(main())
九、总结
Python异步编程是现代后端开发的重要技能。通过本文的学习,我们掌握了:
- 协程基础 :协程是比线程更轻量的执行单元,
async/await是协程的现代语法 - 事件循环:事件循环是异步编程的核心,负责调度协程任务
- asyncio API :包括
gather、TaskGroup、Semaphore、Queue等 - 异步上下文管理 :支持
async with和async for协议 - 异步HTTP :使用
aiohttp实现高性能HTTP请求 - 异步Web框架:使用FastAPI构建高性能异步API
- 最佳实践:避免阻塞调用、正确处理异常和取消任务
异步编程虽然增加了代码复杂度,但能显著提升I/O密集型应用的性能。建议在实际项目中多加练习,逐步掌握这一强大的技术。