一、协程:异步编程的基础单元
理解协程对象
协程是可暂停和恢复的函数,使用async def定义:
python
import asyncio
import time
from typing import Coroutine
# 基础协程
async def simple_coroutine(name: str, delay: float) -> str:
"""一个简单的协程"""
print(f"{name}: 开始执行,等待 {delay} 秒")
await asyncio.sleep(delay) # 非阻塞等待
print(f"{name}: 执行完成")
return f"{name}-结果"
# 协程的三种状态
async def coroutine_lifecycle():
"""演示协程的生命周期"""
coro = simple_coroutine("测试", 1.0)
print(f"协程类型: {type(coro)}")
print(f"是协程对象: {asyncio.iscoroutine(coro)}")
print(f"是协程函数: {asyncio.iscoroutinefunction(simple_coroutine)}")
# 协程的状态
print(f"初始状态: {coro.cr_running}") # False,尚未运行
# 运行协程
task = asyncio.create_task(coro)
print(f"创建任务后: {task._state}") # 'PENDING'
await task
print(f"完成后: {task._state}") # 'FINISHED'
# 手动控制协程执行
async def manual_coroutine_control():
"""手动控制协程执行(不推荐在生产中使用)"""
coro = simple_coroutine("手动控制", 0.5)
try:
# 手动推进协程
coro.send(None) # 或 next(coro),启动协程
except StopIteration as e:
print(f"协程返回值: {e.value}")
except Exception as e:
print(f"协程异常: {e}")
# 运行
asyncio.run(coroutine_lifecycle())
二、任务:并发执行的管理器
任务的创建与取消
python
import asyncio
from asyncio import Task, CancelledError
import signal
async def cancellable_task(name: str, duration: float) -> str:
"""可取消的任务"""
try:
print(f"{name}: 开始长时间运行")
for i in range(int(duration * 10)):
await asyncio.sleep(0.1)
print(f"{name}: 进度 {i+1}/10")
return f"{name}: 完成"
except CancelledError:
print(f"{name}: 被取消")
raise # 重新抛出以便调用者知道
finally:
print(f"{name}: 清理资源")
async def task_management():
"""任务管理示例"""
# 创建多个任务
task1 = asyncio.create_task(cancellable_task("任务1", 2.0))
task2 = asyncio.create_task(cancellable_task("任务2", 3.0))
task3 = asyncio.create_task(cancellable_task("任务3", 1.0))
# 等待一段时间后取消任务2
await asyncio.sleep(1.5)
if not task2.done():
task2.cancel()
print("已取消任务2")
# 收集结果
done, pending = await asyncio.wait(
[task1, task2, task3],
timeout=2.0,
return_when=asyncio.FIRST_COMPLETED
)
print(f"完成的任务数: {len(done)}")
print(f"挂起的任务数: {len(pending)}")
# 处理结果
for task in done:
if task.exception():
print(f"任务异常: {task.exception()}")
else:
print(f"任务结果: {task.result()}")
# 取消剩余任务
for task in pending:
task.cancel()
async def timeout_handling():
"""超时处理"""
async def slow_operation():
await asyncio.sleep(5)
return "慢操作完成"
try:
# 方式1:asyncio.wait_for
result = await asyncio.wait_for(slow_operation(), timeout=2.0)
print(result)
except asyncio.TimeoutError:
print("操作超时")
# 方式2:asyncio.shield(防止取消)
task = asyncio.create_task(slow_operation())
try:
result = await asyncio.wait_for(asyncio.shield(task), timeout=2.0)
except asyncio.TimeoutError:
print("等待超时,但后台任务继续运行")
# 检查后台任务状态
await asyncio.sleep(3)
if task.done():
print(f"后台任务最终结果: {task.result()}")
# 运行
asyncio.run(timeout_handling())
三、异步上下文管理器与迭代器
async with和async for
python
import asyncio
import aiohttp
from typing import AsyncIterator, List
class AsyncConnectionPool:
"""异步连接池"""
def __init__(self, pool_size: int = 5):
self.pool_size = pool_size
self._connections: List[str] = []
self._semaphore = asyncio.Semaphore(pool_size)
async def __aenter__(self):
print("初始化连接池")
# 模拟建立连接
for i in range(self.pool_size):
await asyncio.sleep(0.1)
self._connections.append(f"连接-{i+1}")
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
print("清理连接池")
# 模拟关闭连接
for conn in self._connections:
await asyncio.sleep(0.05)
print(f"关闭 {conn}")
self._connections.clear()
async def acquire(self) -> str:
"""获取连接"""
await self._semaphore.acquire()
conn = self._connections.pop()
return conn
def release(self, conn: str):
"""释放连接"""
self._connections.append(conn)
self._semaphore.release()
async def execute_query(self, query: str) -> str:
"""执行查询"""
conn = await self.acquire()
try:
await asyncio.sleep(0.5) # 模拟查询
return f"{conn}: 执行 '{query}' 的结果"
finally:
self.release(conn)
class AsyncDataStream:
"""异步数据流"""
def __init__(self, data: List[str], delay: float = 0.1):
self.data = data
self.delay = delay
self.index = 0
def __aiter__(self) -> AsyncIterator[str]:
return self
async def __anext__(self) -> str:
if self.index >= len(self.data):
raise StopAsyncIteration
item = self.data[self.index]
self.index += 1
# 模拟异步获取数据
await asyncio.sleep(self.delay)
return f"处理: {item}"
async def async_context_example():
"""异步上下文管理器示例"""
async with AsyncConnectionPool(3) as pool:
tasks = []
for i in range(10):
query = f"SELECT * FROM table{i}"
task = asyncio.create_task(pool.execute_query(query))
tasks.append(task)
results = await asyncio.gather(*tasks, return_exceptions=True)
for result in results:
if isinstance(result, Exception):
print(f"查询失败: {result}")
else:
print(result)
async def async_iter_example():
"""异步迭代器示例"""
data = ["A", "B", "C", "D", "E"]
stream = AsyncDataStream(data, delay=0.2)
# 方式1:直接使用async for
async for item in stream:
print(item)
# 方式2:使用anext()
stream2 = AsyncDataStream(data)
try:
while True:
item = await anext(stream2) # Python 3.10+
print(f"手动获取: {item}")
except StopAsyncIteration:
print("迭代结束")
# 方式3:异步推导式
stream3 = AsyncDataStream(data)
processed = [item async for item in stream3]
print(f"异步推导式结果: {processed}")
# 运行
asyncio.run(async_iter_example())
四、信号量与高级并发控制
限制并发数
python
import asyncio
from asyncio import Semaphore, BoundedSemaphore, Lock, Event, Condition
import random
async def worker(semaphore: Semaphore, name: str, work_time: float):
"""使用信号量控制的工作器"""
async with semaphore:
print(f"{name}: 获取信号量,开始工作")
await asyncio.sleep(work_time)
print(f"{name}: 完成工作,释放信号量")
return f"{name}-完成"
async def semaphore_example():
"""信号量示例:限制最多3个并发任务"""
semaphore = Semaphore(3)
tasks = []
for i in range(10):
work_time = random.uniform(0.5, 2.0)
task = asyncio.create_task(worker(semaphore, f"Worker-{i}", work_time))
tasks.append(task)
await asyncio.gather(*tasks)
class AsyncRateLimiter:
"""异步速率限制器"""
def __init__(self, rate: float):
self.rate = rate # 每秒最多调用次数
self._tokens = rate
self._last_update = asyncio.get_event_loop().time()
self._lock = Lock()
async def acquire(self):
async with self._lock:
now = asyncio.get_event_loop().time()
elapsed = now - self._last_update
# 添加令牌
self._tokens = min(self.rate, self._tokens + elapsed * self.rate)
self._last_update = now
if self._tokens >= 1:
self._tokens -= 1
return True
else:
# 等待足够的令牌
wait_time = (1 - self._tokens) / self.rate
await asyncio.sleep(wait_time)
self._tokens = 0
self._last_update = asyncio.get_event_loop().time()
return True
async def rate_limiter_example():
"""速率限制器示例"""
limiter = AsyncRateLimiter(rate=2.0) # 每秒最多2次
async def make_request(name: str):
await limiter.acquire()
print(f"{time.strftime('%H:%M:%S')} {name}: 发送请求")
await asyncio.sleep(0.1)
tasks = []
for i in range(10):
task = asyncio.create_task(make_request(f"Req-{i}"))
tasks.append(task)
await asyncio.sleep(0.1) # 模拟请求间隔
await asyncio.gather(*tasks)
class AsyncPipeline:
"""异步处理管道"""
def __init__(self):
self._queue = asyncio.Queue(maxsize=10)
self._event = Event()
self._condition = Condition()
self._workers = []
async def producer(self, name: str, count: int):
"""生产者协程"""
for i in range(count):
item = f"{name}-项目{i}"
await self._queue.put(item)
print(f"生产者 {name}: 生产 {item}")
await asyncio.sleep(random.uniform(0.1, 0.3))
print(f"生产者 {name}: 完成")
async def consumer(self, name: str):
"""消费者协程"""
while True:
try:
item = await asyncio.wait_for(self._queue.get(), timeout=2.0)
print(f"消费者 {name}: 处理 {item}")
await asyncio.sleep(random.uniform(0.2, 0.5))
self._queue.task_done()
except asyncio.TimeoutError:
print(f"消费者 {name}: 超时,退出")
break
async def run(self, producer_count: int = 3, consumer_count: int = 2):
"""运行管道"""
# 创建生产者
for i in range(producer_count):
task = asyncio.create_task(self.producer(f"P{i}", 5))
self._workers.append(task)
# 创建消费者
for i in range(consumer_count):
task = asyncio.create_task(self.consumer(f"C{i}"))
self._workers.append(task)
# 等待所有生产者完成
await asyncio.gather(*self._workers[:producer_count])
# 等待队列清空
await self._queue.join()
# 取消消费者
for task in self._workers[producer_count:]:
task.cancel()
# 等待所有任务完成
await asyncio.gather(*self._workers, return_exceptions=True)
# 运行
asyncio.run(AsyncPipeline().run())
五、异步IO与网络编程
aiohttp和异步数据库
python
import asyncio
import aiohttp
import asyncpg
from typing import List, Dict, Any
import json
class AsyncWebScraper:
"""异步网页抓取器"""
def __init__(self, concurrency_limit: int = 10):
self.semaphore = Semaphore(concurrency_limit)
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def fetch_url(self, url: str) -> Dict[str, Any]:
"""获取单个URL"""
async with self.semaphore:
try:
async with self.session.get(url, timeout=10) as response:
text = await response.text()
return {
'url': url,
'status': response.status,
'size': len(text),
'content': text[:100] # 只取前100字符
}
except Exception as e:
return {'url': url, 'error': str(e)}
async def fetch_multiple(self, urls: List[str]) -> List[Dict[str, Any]]:
"""并发获取多个URL"""
tasks = [self.fetch_url(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
processed = []
for result in results:
if isinstance(result, Exception):
processed.append({'error': str(result)})
else:
processed.append(result)
return processed
class AsyncDatabase:
"""异步数据库操作"""
def __init__(self, dsn: str):
self.dsn = dsn
self.pool = None
async def connect(self, pool_size: int = 10):
"""创建连接池"""
self.pool = await asyncpg.create_pool(
self.dsn,
min_size=1,
max_size=pool_size,
command_timeout=60
)
async def close(self):
"""关闭连接池"""
if self.pool:
await self.pool.close()
async def batch_insert(self, table: str, records: List[Dict]) -> int:
"""批量插入数据"""
if not records:
return 0
# 准备数据
columns = records[0].keys()
values = [tuple(record[col] for col in columns) for record in records]
async with self.pool.acquire() as conn:
# 开始事务
async with conn.transaction():
stmt = await conn.prepare(f"""
INSERT INTO {table} ({','.join(columns)})
VALUES ({','.join(f'${i+1}' for i in range(len(columns)))})
""")
# 批量执行
await stmt.executemany(values)
return len(values)
async def concurrent_queries(self, queries: List[str]) -> List[List[Dict]]:
"""并发执行查询"""
async def execute_query(query: str):
async with self.pool.acquire() as conn:
return await conn.fetch(query)
tasks = [execute_query(query) for query in queries]
results = await asyncio.gather(*tasks, return_exceptions=True)
processed = []
for result in results:
if isinstance(result, Exception):
processed.append({'error': str(result)})
else:
processed.append([dict(record) for record in result])
return processed
async def web_scraper_example():
"""网页抓取示例"""
urls = [
'https://httpbin.org/get',
'https://httpbin.org/delay/1',
'https://httpbin.org/delay/2',
'https://httpbin.org/status/404',
'https://nonexistent.example.com',
]
async with AsyncWebScraper(concurrency_limit=3) as scraper:
results = await scraper.fetch_multiple(urls * 2) # 重复URL测试去重
for result in results:
if 'error' in result:
print(f"错误: {result['url']} - {result['error']}")
else:
print(f"成功: {result['url']} - 状态: {result['status']}")
async def database_example():
"""数据库示例(需要PostgreSQL数据库)"""
# 注意:实际使用时需要替换为真实的数据库连接信息
dsn = "postgresql://user:password@localhost/dbname"
db = AsyncDatabase(dsn)
try:
await db.connect()
# 创建测试表
async with db.pool.acquire() as conn:
await conn.execute('''
CREATE TABLE IF NOT EXISTS test_users (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
age INTEGER,
created_at TIMESTAMP DEFAULT NOW()
)
''')
# 批量插入
users = [
{'name': 'Alice', 'age': 25},
{'name': 'Bob', 'age': 30},
{'name': 'Charlie', 'age': 35},
]
count = await db.batch_insert('test_users', users)
print(f"插入了 {count} 条记录")
# 并发查询
queries = [
"SELECT * FROM test_users WHERE age > 20",
"SELECT COUNT(*) as count FROM test_users",
"SELECT name, age FROM test_users ORDER BY age DESC",
]
results = await db.concurrent_queries(queries)
for i, result in enumerate(results):
print(f"查询 {i+1} 结果: {result}")
finally:
await db.close()
# 运行示例
async def main():
print("=== 网页抓取示例 ===")
await web_scraper_example()
print("\n=== 异步管道示例 ===")
await AsyncPipeline().run()
# 注意:数据库示例需要真实数据库,这里只展示代码结构
# asyncio.run(main())
六、调试与性能优化
异步代码调试工具
python
import asyncio
import logging
from contextvars import ContextVar
import traceback
import uvloop # 高性能事件循环
# 设置异步调试日志
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# 上下文变量
request_id: ContextVar[str] = ContextVar('request_id', default='unknown')
class AsyncDebugger:
"""异步调试器"""
@staticmethod
def enable_debug_mode():
"""启用调试模式"""
# 启用详细日志
logging.getLogger('asyncio').setLevel(logging.DEBUG)
# 设置事件循环调试
loop = asyncio.get_event_loop()
loop.set_debug(True)
# 慢回调警告(秒)
loop.slow_callback_duration = 0.1
# 安装异常处理器
loop.set_exception_handler(AsyncDebugger.exception_handler)
@staticmethod
def exception_handler(loop, context):
"""处理事件循环异常"""
message = context.get('message', '未处理的异常')
exception = context.get('exception')
task = context.get('task')
logger.error(f"事件循环异常: {message}")
if exception:
logger.error(f"异常详情: {exception}")
logger.error(f"异常堆栈: {traceback.format_exc()}")
if task:
logger.error(f"相关任务: {task}")
# 打印所有运行中的任务
tasks = asyncio.all_tasks(loop)
if tasks:
logger.error(f"当前运行中的任务数: {len(tasks)}")
for t in tasks:
if not t.done():
logger.error(f" 任务: {t}, 状态: {t._state}")
@staticmethod
async def trace_coroutine(coro, name: str = None):
"""跟踪协程执行"""
import time
start = time.time()
try:
result = await coro
elapsed = time.time() - start
logger.debug(f"协程 {name or coro.__name__} 完成,耗时: {elapsed:.3f}s")
return result
except Exception as e:
elapsed = time.time() - start
logger.error(f"协程 {name or coro.__name__} 失败,耗时: {elapsed:.3f}s, 错误: {e}")
raise
async def performance_example():
"""性能对比示例"""
# 使用uvloop提升性能(Linux/macOS)
try:
import uvloop
uvloop.install()
print("使用uvloop事件循环")
except ImportError:
print("使用标准asyncio事件循环")
# 对比不同并发策略
async def io_bound_operation(id: int):
"""模拟IO操作"""
await asyncio.sleep(0.1)
return id * 2
# 方法1:顺序执行
start = asyncio.get_event_loop().time()
results1 = []
for i in range(10):
results1.append(await io_bound_operation(i))
time1 = asyncio.get_event_loop().time() - start
# 方法2:并发执行
start = asyncio.get_event_loop().time()
tasks = [io_bound_operation(i) for i in range(10)]
results2 = await asyncio.gather(*tasks)
time2 = asyncio.get_event_loop().time() - start
print(f"顺序执行时间: {time1:.3f}s")
print(f"并发执行时间: {time2:.3f}s")
print(f"性能提升: {time1/time2:.1f}倍")
# 内存使用分析
import sys
coro = io_bound_operation(1)
print(f"协程对象大小: {sys.getsizeof(coro)} 字节")
task = asyncio.create_task(io_bound_operation(1))
print(f"任务对象大小: {sys.getsizeof(task)} 字节")
# 运行
async def debug_demo():
"""调试演示"""
AsyncDebugger.enable_debug_mode()
# 设置请求ID
request_id.set("req-123")
async def problematic_coroutine():
await asyncio.sleep(0.05)
# 模拟错误
if random.random() < 0.3:
raise ValueError("随机错误")
return "成功"
# 使用调试包装器
await AsyncDebugger.trace_coroutine(
problematic_coroutine(),
"测试协程"
)
# 创建多个任务观察调试输出
tasks = []
for i in range(5):
task = asyncio.create_task(problematic_coroutine())
tasks.append(task)
await asyncio.gather(*tasks, return_exceptions=True)
if __name__ == "__main__":
# 运行性能示例
asyncio.run(performance_example())
# 运行调试示例(取消注释以查看调试输出)
# asyncio.run(debug_demo())
总结
Python异步编程通过asyncio和async/await提供了:
核心概念:
- 协程 :使用
async def定义的异步函数 - 任务:管理和调度协程的执行
- 事件循环:协调所有异步操作的中央调度器
关键特性:
- 并发控制:信号量、锁、事件、条件变量
- 异步上下文 :
async with和async for - 错误处理:超时、取消、异常传播
- 性能优化:连接池、速率限制、批处理