Python异步编程:asyncio与async/await深度解析

一、协程:异步编程的基础单元

理解协程对象

协程是可暂停和恢复的函数,使用async def定义:

python 复制代码
import asyncio
import time
from typing import Coroutine

# 基础协程
async def simple_coroutine(name: str, delay: float) -> str:
    """一个简单的协程"""
    print(f"{name}: 开始执行,等待 {delay} 秒")
    await asyncio.sleep(delay)  # 非阻塞等待
    print(f"{name}: 执行完成")
    return f"{name}-结果"

# 协程的三种状态
async def coroutine_lifecycle():
    """演示协程的生命周期"""
    coro = simple_coroutine("测试", 1.0)
    
    print(f"协程类型: {type(coro)}")
    print(f"是协程对象: {asyncio.iscoroutine(coro)}")
    print(f"是协程函数: {asyncio.iscoroutinefunction(simple_coroutine)}")
    
    # 协程的状态
    print(f"初始状态: {coro.cr_running}")  # False,尚未运行
    
    # 运行协程
    task = asyncio.create_task(coro)
    print(f"创建任务后: {task._state}")  # 'PENDING'
    
    await task
    print(f"完成后: {task._state}")  # 'FINISHED'

# 手动控制协程执行
async def manual_coroutine_control():
    """手动控制协程执行(不推荐在生产中使用)"""
    coro = simple_coroutine("手动控制", 0.5)
    
    try:
        # 手动推进协程
        coro.send(None)  # 或 next(coro),启动协程
    except StopIteration as e:
        print(f"协程返回值: {e.value}")
    except Exception as e:
        print(f"协程异常: {e}")

# 运行
asyncio.run(coroutine_lifecycle())

二、任务:并发执行的管理器

任务的创建与取消

python 复制代码
import asyncio
from asyncio import Task, CancelledError
import signal

async def cancellable_task(name: str, duration: float) -> str:
    """可取消的任务"""
    try:
        print(f"{name}: 开始长时间运行")
        for i in range(int(duration * 10)):
            await asyncio.sleep(0.1)
            print(f"{name}: 进度 {i+1}/10")
        return f"{name}: 完成"
    except CancelledError:
        print(f"{name}: 被取消")
        raise  # 重新抛出以便调用者知道
    finally:
        print(f"{name}: 清理资源")

async def task_management():
    """任务管理示例"""
    # 创建多个任务
    task1 = asyncio.create_task(cancellable_task("任务1", 2.0))
    task2 = asyncio.create_task(cancellable_task("任务2", 3.0))
    task3 = asyncio.create_task(cancellable_task("任务3", 1.0))
    
    # 等待一段时间后取消任务2
    await asyncio.sleep(1.5)
    
    if not task2.done():
        task2.cancel()
        print("已取消任务2")
    
    # 收集结果
    done, pending = await asyncio.wait(
        [task1, task2, task3],
        timeout=2.0,
        return_when=asyncio.FIRST_COMPLETED
    )
    
    print(f"完成的任务数: {len(done)}")
    print(f"挂起的任务数: {len(pending)}")
    
    # 处理结果
    for task in done:
        if task.exception():
            print(f"任务异常: {task.exception()}")
        else:
            print(f"任务结果: {task.result()}")
    
    # 取消剩余任务
    for task in pending:
        task.cancel()

async def timeout_handling():
    """超时处理"""
    async def slow_operation():
        await asyncio.sleep(5)
        return "慢操作完成"
    
    try:
        # 方式1:asyncio.wait_for
        result = await asyncio.wait_for(slow_operation(), timeout=2.0)
        print(result)
    except asyncio.TimeoutError:
        print("操作超时")
    
    # 方式2:asyncio.shield(防止取消)
    task = asyncio.create_task(slow_operation())
    try:
        result = await asyncio.wait_for(asyncio.shield(task), timeout=2.0)
    except asyncio.TimeoutError:
        print("等待超时,但后台任务继续运行")
        # 检查后台任务状态
        await asyncio.sleep(3)
        if task.done():
            print(f"后台任务最终结果: {task.result()}")

# 运行
asyncio.run(timeout_handling())

三、异步上下文管理器与迭代器

async withasync for

python 复制代码
import asyncio
import aiohttp
from typing import AsyncIterator, List

class AsyncConnectionPool:
    """异步连接池"""
    def __init__(self, pool_size: int = 5):
        self.pool_size = pool_size
        self._connections: List[str] = []
        self._semaphore = asyncio.Semaphore(pool_size)
    
    async def __aenter__(self):
        print("初始化连接池")
        # 模拟建立连接
        for i in range(self.pool_size):
            await asyncio.sleep(0.1)
            self._connections.append(f"连接-{i+1}")
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        print("清理连接池")
        # 模拟关闭连接
        for conn in self._connections:
            await asyncio.sleep(0.05)
            print(f"关闭 {conn}")
        self._connections.clear()
    
    async def acquire(self) -> str:
        """获取连接"""
        await self._semaphore.acquire()
        conn = self._connections.pop()
        return conn
    
    def release(self, conn: str):
        """释放连接"""
        self._connections.append(conn)
        self._semaphore.release()
    
    async def execute_query(self, query: str) -> str:
        """执行查询"""
        conn = await self.acquire()
        try:
            await asyncio.sleep(0.5)  # 模拟查询
            return f"{conn}: 执行 '{query}' 的结果"
        finally:
            self.release(conn)

class AsyncDataStream:
    """异步数据流"""
    def __init__(self, data: List[str], delay: float = 0.1):
        self.data = data
        self.delay = delay
        self.index = 0
    
    def __aiter__(self) -> AsyncIterator[str]:
        return self
    
    async def __anext__(self) -> str:
        if self.index >= len(self.data):
            raise StopAsyncIteration
        
        item = self.data[self.index]
        self.index += 1
        
        # 模拟异步获取数据
        await asyncio.sleep(self.delay)
        return f"处理: {item}"

async def async_context_example():
    """异步上下文管理器示例"""
    async with AsyncConnectionPool(3) as pool:
        tasks = []
        for i in range(10):
            query = f"SELECT * FROM table{i}"
            task = asyncio.create_task(pool.execute_query(query))
            tasks.append(task)
        
        results = await asyncio.gather(*tasks, return_exceptions=True)
        for result in results:
            if isinstance(result, Exception):
                print(f"查询失败: {result}")
            else:
                print(result)

async def async_iter_example():
    """异步迭代器示例"""
    data = ["A", "B", "C", "D", "E"]
    stream = AsyncDataStream(data, delay=0.2)
    
    # 方式1:直接使用async for
    async for item in stream:
        print(item)
    
    # 方式2:使用anext()
    stream2 = AsyncDataStream(data)
    try:
        while True:
            item = await anext(stream2)  # Python 3.10+
            print(f"手动获取: {item}")
    except StopAsyncIteration:
        print("迭代结束")
    
    # 方式3:异步推导式
    stream3 = AsyncDataStream(data)
    processed = [item async for item in stream3]
    print(f"异步推导式结果: {processed}")

# 运行
asyncio.run(async_iter_example())

四、信号量与高级并发控制

限制并发数

python 复制代码
import asyncio
from asyncio import Semaphore, BoundedSemaphore, Lock, Event, Condition
import random

async def worker(semaphore: Semaphore, name: str, work_time: float):
    """使用信号量控制的工作器"""
    async with semaphore:
        print(f"{name}: 获取信号量,开始工作")
        await asyncio.sleep(work_time)
        print(f"{name}: 完成工作,释放信号量")
    return f"{name}-完成"

async def semaphore_example():
    """信号量示例:限制最多3个并发任务"""
    semaphore = Semaphore(3)
    
    tasks = []
    for i in range(10):
        work_time = random.uniform(0.5, 2.0)
        task = asyncio.create_task(worker(semaphore, f"Worker-{i}", work_time))
        tasks.append(task)
    
    await asyncio.gather(*tasks)

class AsyncRateLimiter:
    """异步速率限制器"""
    def __init__(self, rate: float):
        self.rate = rate  # 每秒最多调用次数
        self._tokens = rate
        self._last_update = asyncio.get_event_loop().time()
        self._lock = Lock()
    
    async def acquire(self):
        async with self._lock:
            now = asyncio.get_event_loop().time()
            elapsed = now - self._last_update
            
            # 添加令牌
            self._tokens = min(self.rate, self._tokens + elapsed * self.rate)
            self._last_update = now
            
            if self._tokens >= 1:
                self._tokens -= 1
                return True
            else:
                # 等待足够的令牌
                wait_time = (1 - self._tokens) / self.rate
                await asyncio.sleep(wait_time)
                self._tokens = 0
                self._last_update = asyncio.get_event_loop().time()
                return True

async def rate_limiter_example():
    """速率限制器示例"""
    limiter = AsyncRateLimiter(rate=2.0)  # 每秒最多2次
    
    async def make_request(name: str):
        await limiter.acquire()
        print(f"{time.strftime('%H:%M:%S')} {name}: 发送请求")
        await asyncio.sleep(0.1)
    
    tasks = []
    for i in range(10):
        task = asyncio.create_task(make_request(f"Req-{i}"))
        tasks.append(task)
        await asyncio.sleep(0.1)  # 模拟请求间隔
    
    await asyncio.gather(*tasks)

class AsyncPipeline:
    """异步处理管道"""
    def __init__(self):
        self._queue = asyncio.Queue(maxsize=10)
        self._event = Event()
        self._condition = Condition()
        self._workers = []
    
    async def producer(self, name: str, count: int):
        """生产者协程"""
        for i in range(count):
            item = f"{name}-项目{i}"
            await self._queue.put(item)
            print(f"生产者 {name}: 生产 {item}")
            await asyncio.sleep(random.uniform(0.1, 0.3))
        
        print(f"生产者 {name}: 完成")
    
    async def consumer(self, name: str):
        """消费者协程"""
        while True:
            try:
                item = await asyncio.wait_for(self._queue.get(), timeout=2.0)
                print(f"消费者 {name}: 处理 {item}")
                await asyncio.sleep(random.uniform(0.2, 0.5))
                self._queue.task_done()
            except asyncio.TimeoutError:
                print(f"消费者 {name}: 超时,退出")
                break
    
    async def run(self, producer_count: int = 3, consumer_count: int = 2):
        """运行管道"""
        # 创建生产者
        for i in range(producer_count):
            task = asyncio.create_task(self.producer(f"P{i}", 5))
            self._workers.append(task)
        
        # 创建消费者
        for i in range(consumer_count):
            task = asyncio.create_task(self.consumer(f"C{i}"))
            self._workers.append(task)
        
        # 等待所有生产者完成
        await asyncio.gather(*self._workers[:producer_count])
        
        # 等待队列清空
        await self._queue.join()
        
        # 取消消费者
        for task in self._workers[producer_count:]:
            task.cancel()
        
        # 等待所有任务完成
        await asyncio.gather(*self._workers, return_exceptions=True)

# 运行
asyncio.run(AsyncPipeline().run())

五、异步IO与网络编程

aiohttp和异步数据库

python 复制代码
import asyncio
import aiohttp
import asyncpg
from typing import List, Dict, Any
import json

class AsyncWebScraper:
    """异步网页抓取器"""
    def __init__(self, concurrency_limit: int = 10):
        self.semaphore = Semaphore(concurrency_limit)
        self.session = None
    
    async def __aenter__(self):
        self.session = aiohttp.ClientSession()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if self.session:
            await self.session.close()
    
    async def fetch_url(self, url: str) -> Dict[str, Any]:
        """获取单个URL"""
        async with self.semaphore:
            try:
                async with self.session.get(url, timeout=10) as response:
                    text = await response.text()
                    return {
                        'url': url,
                        'status': response.status,
                        'size': len(text),
                        'content': text[:100]  # 只取前100字符
                    }
            except Exception as e:
                return {'url': url, 'error': str(e)}
    
    async def fetch_multiple(self, urls: List[str]) -> List[Dict[str, Any]]:
        """并发获取多个URL"""
        tasks = [self.fetch_url(url) for url in urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        processed = []
        for result in results:
            if isinstance(result, Exception):
                processed.append({'error': str(result)})
            else:
                processed.append(result)
        
        return processed

class AsyncDatabase:
    """异步数据库操作"""
    def __init__(self, dsn: str):
        self.dsn = dsn
        self.pool = None
    
    async def connect(self, pool_size: int = 10):
        """创建连接池"""
        self.pool = await asyncpg.create_pool(
            self.dsn,
            min_size=1,
            max_size=pool_size,
            command_timeout=60
        )
    
    async def close(self):
        """关闭连接池"""
        if self.pool:
            await self.pool.close()
    
    async def batch_insert(self, table: str, records: List[Dict]) -> int:
        """批量插入数据"""
        if not records:
            return 0
        
        # 准备数据
        columns = records[0].keys()
        values = [tuple(record[col] for col in columns) for record in records]
        
        async with self.pool.acquire() as conn:
            # 开始事务
            async with conn.transaction():
                stmt = await conn.prepare(f"""
                    INSERT INTO {table} ({','.join(columns)})
                    VALUES ({','.join(f'${i+1}' for i in range(len(columns)))})
                """)
                
                # 批量执行
                await stmt.executemany(values)
                return len(values)
    
    async def concurrent_queries(self, queries: List[str]) -> List[List[Dict]]:
        """并发执行查询"""
        async def execute_query(query: str):
            async with self.pool.acquire() as conn:
                return await conn.fetch(query)
        
        tasks = [execute_query(query) for query in queries]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        
        processed = []
        for result in results:
            if isinstance(result, Exception):
                processed.append({'error': str(result)})
            else:
                processed.append([dict(record) for record in result])
        
        return processed

async def web_scraper_example():
    """网页抓取示例"""
    urls = [
        'https://httpbin.org/get',
        'https://httpbin.org/delay/1',
        'https://httpbin.org/delay/2',
        'https://httpbin.org/status/404',
        'https://nonexistent.example.com',
    ]
    
    async with AsyncWebScraper(concurrency_limit=3) as scraper:
        results = await scraper.fetch_multiple(urls * 2)  # 重复URL测试去重
        
        for result in results:
            if 'error' in result:
                print(f"错误: {result['url']} - {result['error']}")
            else:
                print(f"成功: {result['url']} - 状态: {result['status']}")

async def database_example():
    """数据库示例(需要PostgreSQL数据库)"""
    # 注意:实际使用时需要替换为真实的数据库连接信息
    dsn = "postgresql://user:password@localhost/dbname"
    
    db = AsyncDatabase(dsn)
    try:
        await db.connect()
        
        # 创建测试表
        async with db.pool.acquire() as conn:
            await conn.execute('''
                CREATE TABLE IF NOT EXISTS test_users (
                    id SERIAL PRIMARY KEY,
                    name TEXT NOT NULL,
                    age INTEGER,
                    created_at TIMESTAMP DEFAULT NOW()
                )
            ''')
        
        # 批量插入
        users = [
            {'name': 'Alice', 'age': 25},
            {'name': 'Bob', 'age': 30},
            {'name': 'Charlie', 'age': 35},
        ]
        
        count = await db.batch_insert('test_users', users)
        print(f"插入了 {count} 条记录")
        
        # 并发查询
        queries = [
            "SELECT * FROM test_users WHERE age > 20",
            "SELECT COUNT(*) as count FROM test_users",
            "SELECT name, age FROM test_users ORDER BY age DESC",
        ]
        
        results = await db.concurrent_queries(queries)
        for i, result in enumerate(results):
            print(f"查询 {i+1} 结果: {result}")
            
    finally:
        await db.close()

# 运行示例
async def main():
    print("=== 网页抓取示例 ===")
    await web_scraper_example()
    
    print("\n=== 异步管道示例 ===")
    await AsyncPipeline().run()

# 注意:数据库示例需要真实数据库,这里只展示代码结构
# asyncio.run(main())

六、调试与性能优化

异步代码调试工具

python 复制代码
import asyncio
import logging
from contextvars import ContextVar
import traceback
import uvloop  # 高性能事件循环

# 设置异步调试日志
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# 上下文变量
request_id: ContextVar[str] = ContextVar('request_id', default='unknown')

class AsyncDebugger:
    """异步调试器"""
    
    @staticmethod
    def enable_debug_mode():
        """启用调试模式"""
        # 启用详细日志
        logging.getLogger('asyncio').setLevel(logging.DEBUG)
        
        # 设置事件循环调试
        loop = asyncio.get_event_loop()
        loop.set_debug(True)
        
        # 慢回调警告(秒)
        loop.slow_callback_duration = 0.1
        
        # 安装异常处理器
        loop.set_exception_handler(AsyncDebugger.exception_handler)
    
    @staticmethod
    def exception_handler(loop, context):
        """处理事件循环异常"""
        message = context.get('message', '未处理的异常')
        exception = context.get('exception')
        task = context.get('task')
        
        logger.error(f"事件循环异常: {message}")
        
        if exception:
            logger.error(f"异常详情: {exception}")
            logger.error(f"异常堆栈: {traceback.format_exc()}")
        
        if task:
            logger.error(f"相关任务: {task}")
        
        # 打印所有运行中的任务
        tasks = asyncio.all_tasks(loop)
        if tasks:
            logger.error(f"当前运行中的任务数: {len(tasks)}")
            for t in tasks:
                if not t.done():
                    logger.error(f"  任务: {t}, 状态: {t._state}")
    
    @staticmethod
    async def trace_coroutine(coro, name: str = None):
        """跟踪协程执行"""
        import time
        start = time.time()
        
        try:
            result = await coro
            elapsed = time.time() - start
            logger.debug(f"协程 {name or coro.__name__} 完成,耗时: {elapsed:.3f}s")
            return result
        except Exception as e:
            elapsed = time.time() - start
            logger.error(f"协程 {name or coro.__name__} 失败,耗时: {elapsed:.3f}s, 错误: {e}")
            raise

async def performance_example():
    """性能对比示例"""
    # 使用uvloop提升性能(Linux/macOS)
    try:
        import uvloop
        uvloop.install()
        print("使用uvloop事件循环")
    except ImportError:
        print("使用标准asyncio事件循环")
    
    # 对比不同并发策略
    async def io_bound_operation(id: int):
        """模拟IO操作"""
        await asyncio.sleep(0.1)
        return id * 2
    
    # 方法1:顺序执行
    start = asyncio.get_event_loop().time()
    results1 = []
    for i in range(10):
        results1.append(await io_bound_operation(i))
    time1 = asyncio.get_event_loop().time() - start
    
    # 方法2:并发执行
    start = asyncio.get_event_loop().time()
    tasks = [io_bound_operation(i) for i in range(10)]
    results2 = await asyncio.gather(*tasks)
    time2 = asyncio.get_event_loop().time() - start
    
    print(f"顺序执行时间: {time1:.3f}s")
    print(f"并发执行时间: {time2:.3f}s")
    print(f"性能提升: {time1/time2:.1f}倍")
    
    # 内存使用分析
    import sys
    coro = io_bound_operation(1)
    print(f"协程对象大小: {sys.getsizeof(coro)} 字节")
    
    task = asyncio.create_task(io_bound_operation(1))
    print(f"任务对象大小: {sys.getsizeof(task)} 字节")

# 运行
async def debug_demo():
    """调试演示"""
    AsyncDebugger.enable_debug_mode()
    
    # 设置请求ID
    request_id.set("req-123")
    
    async def problematic_coroutine():
        await asyncio.sleep(0.05)
        # 模拟错误
        if random.random() < 0.3:
            raise ValueError("随机错误")
        return "成功"
    
    # 使用调试包装器
    await AsyncDebugger.trace_coroutine(
        problematic_coroutine(),
        "测试协程"
    )
    
    # 创建多个任务观察调试输出
    tasks = []
    for i in range(5):
        task = asyncio.create_task(problematic_coroutine())
        tasks.append(task)
    
    await asyncio.gather(*tasks, return_exceptions=True)

if __name__ == "__main__":
    # 运行性能示例
    asyncio.run(performance_example())
    
    # 运行调试示例(取消注释以查看调试输出)
    # asyncio.run(debug_demo())

总结

Python异步编程通过asyncioasync/await提供了:

核心概念:

  1. 协程 :使用async def定义的异步函数
  2. 任务:管理和调度协程的执行
  3. 事件循环:协调所有异步操作的中央调度器

关键特性:

  1. 并发控制:信号量、锁、事件、条件变量
  2. 异步上下文async withasync for
  3. 错误处理:超时、取消、异常传播
  4. 性能优化:连接池、速率限制、批处理
相关推荐
我想吃烤肉肉10 小时前
Python 中 asyncio 是什么?
爬虫·python·自动化
咕噜签名-铁蛋11 小时前
英伟达旗下
python
皮肤科大白11 小时前
图像处理的 Python库
图像处理·人工智能·python
FL162386312911 小时前
基于yolo11实现的车辆实时交通流量进出统计与速度测量系统python源码+演示视频
开发语言·python·音视频
华如锦11 小时前
四:从零搭建一个RAG
java·开发语言·人工智能·python·机器学习·spring cloud·计算机视觉
向阳蒲公英11 小时前
Pycharm2025版本配置Anaconda步骤
python
Darkershadow11 小时前
蓝牙学习之uuid与mac
python·学习·ble
北海有初拥12 小时前
Python基础语法万字详解
java·开发语言·python
Mqh18076212 小时前
day61 经典时序模型3
python
我想吃烤肉肉12 小时前
logger比print优秀之处
python