【Python进阶】异步编程完全指南:从asyncio到实战应用

【Python进阶】异步编程完全指南:从asyncio到实战应用

引言

异步编程是现代Python开发中不可或缺的技能,特别是在处理IO密集型任务时。Python的asyncio库提供了强大的异步编程能力,让我们能够编写高效的并发代码。本文将详细介绍Python异步编程的核心概念和实战应用。

一、异步编程基础

1.1 同步 vs 异步

python 复制代码
# 同步编程
import time

def sync_task(name, delay):
    print(f"Task {name} started")
    time.sleep(delay)
    print(f"Task {name} completed")

# 同步执行(串行)
start = time.time()
sync_task("A", 2)
sync_task("B", 2)
print(f"Total time: {time.time() - start:.2f}s")  # ~4秒
python 复制代码
# 异步编程
import asyncio

async def async_task(name, delay):
    print(f"Task {name} started")
    await asyncio.sleep(delay)
    print(f"Task {name} completed")

# 异步执行(并发)
async def main():
    start = time.time()
    await asyncio.gather(
        async_task("A", 2),
        async_task("B", 2)
    )
    print(f"Total time: {time.time() - start:.2f}s")  # ~2秒

asyncio.run(main())

1.2 核心概念对比

概念 同步编程 异步编程
执行方式 串行执行 并发执行
阻塞方式 阻塞等待 非阻塞等待
CPU利用率 低(等待时空闲) 高(等待时执行其他任务)
适用场景 计算密集型 IO密集型

二、asyncio核心概念

2.1 协程(Coroutine)

python 复制代码
# 定义协程
async def greet(name):
    print(f"Hello, {name}")
    await asyncio.sleep(1)
    print(f"Goodbye, {name}")

# 运行协程
asyncio.run(greet("World"))

2.2 事件循环(Event Loop)

python 复制代码
# 获取事件循环
loop = asyncio.get_event_loop()

# 创建任务
task1 = loop.create_task(async_task("A", 2))
task2 = loop.create_task(async_task("B", 2))

# 运行直到完成
loop.run_until_complete(asyncio.gather(task1, task2))

# 关闭事件循环
loop.close()

2.3 任务(Task)

python 复制代码
async def main():
    # 创建任务
    task = asyncio.create_task(async_task("A", 2))
    
    # 取消任务
    await asyncio.sleep(1)
    task.cancel()
    
    try:
        await task
    except asyncio.CancelledError:
        print("Task was cancelled")

asyncio.run(main())

三、异步IO操作

3.1 异步文件IO

python 复制代码
import aiofiles

async def read_file(filepath):
    async with aiofiles.open(filepath, mode='r') as f:
        content = await f.read()
        return content

async def write_file(filepath, content):
    async with aiofiles.open(filepath, mode='w') as f:
        await f.write(content)

async def main():
    content = await read_file('input.txt')
    await write_file('output.txt', content)
    print("Files processed")

asyncio.run(main())

3.2 异步HTTP请求

python 复制代码
import aiohttp

async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def main():
    async with aiohttp.ClientSession() as session:
        tasks = [
            fetch(session, 'https://api.example.com/data/1'),
            fetch(session, 'https://api.example.com/data/2'),
            fetch(session, 'https://api.example.com/data/3')
        ]
        
        results = await asyncio.gather(*tasks)
        for result in results:
            print(result)

asyncio.run(main())

3.3 异步数据库操作

python 复制代码
import asyncpg

async def connect_to_db():
    conn = await asyncpg.connect(
        user='user',
        password='password',
        database='mydb',
        host='localhost'
    )
    return conn

async def query_users(conn):
    users = await conn.fetch('SELECT * FROM users LIMIT 10')
    return users

async def main():
    conn = await connect_to_db()
    users = await query_users(conn)
    for user in users:
        print(user)
    await conn.close()

asyncio.run(main())

四、异步模式与模式

4.1 生产者-消费者模式

python 复制代码
async def producer(queue):
    for i in range(10):
        await asyncio.sleep(0.5)
        await queue.put(f"Item {i}")
        print(f"Produced: Item {i}")
    
    await queue.put(None)  # 结束信号

async def consumer(queue):
    while True:
        item = await queue.get()
        if item is None:
            break
        
        await asyncio.sleep(1)
        print(f"Consumed: {item}")
        queue.task_done()

async def main():
    queue = asyncio.Queue(maxsize=5)
    
    # 创建任务
    producer_task = asyncio.create_task(producer(queue))
    consumer_task = asyncio.create_task(consumer(queue))
    
    await producer_task
    await consumer_task

asyncio.run(main())

4.2 超时控制

python 复制代码
async def slow_task():
    await asyncio.sleep(5)
    return "Done"

async def main():
    try:
        result = await asyncio.wait_for(slow_task(), timeout=2)
        print(result)
    except asyncio.TimeoutError:
        print("Task timed out")

asyncio.run(main())  # 输出: Task timed out

4.3 并发限制

python 复制代码
async def fetch_with_limit(session, url, semaphore):
    async with semaphore:
        async with session.get(url) as response:
            return await response.json()

async def main():
    semaphore = asyncio.Semaphore(10)  # 最多10个并发
    
    async with aiohttp.ClientSession() as session:
        urls = [f'https://api.example.com/data/{i}' for i in range(100)]
        
        tasks = [
            fetch_with_limit(session, url, semaphore)
            for url in urls
        ]
        
        results = await asyncio.gather(*tasks)
        print(f"Fetched {len(results)} items")

asyncio.run(main())

五、异步Web框架

5.1 FastAPI异步端点

python 复制代码
from fastapi import FastAPI
import asyncio

app = FastAPI()

@app.get("/")
async def root():
    await asyncio.sleep(1)
    return {"message": "Hello World"}

@app.get("/items/{item_id}")
async def read_item(item_id: int, q: str = None):
    await asyncio.sleep(0.5)
    return {"item_id": item_id, "q": q}

5.2 Starlette异步应用

python 复制代码
from starlette.applications import Starlette
from starlette.responses import JSONResponse
import asyncio

app = Starlette()

@app.route('/')
async def homepage(request):
    await asyncio.sleep(1)
    return JSONResponse({'hello': 'world'})

@app.route('/async')
async def async_endpoint(request):
    await asyncio.sleep(2)
    return JSONResponse({'async': True})

六、异步编程最佳实践

6.1 避免阻塞调用

python 复制代码
# 错误示例:在异步代码中使用同步阻塞调用
async def bad_example():
    # time.sleep是阻塞的!
    time.sleep(1)  # 这会阻塞整个事件循环
    return "done"

# 正确示例:使用异步版本
async def good_example():
    await asyncio.sleep(1)  # 非阻塞
    return "done"

6.2 合理使用并发

python 复制代码
async def process_batch(items):
    # 分批处理,避免一次性创建过多任务
    batch_size = 100
    for i in range(0, len(items), batch_size):
        batch = items[i:i+batch_size]
        tasks = [process_item(item) for item in batch]
        await asyncio.gather(*tasks)

6.3 错误处理

python 复制代码
async def safe_fetch(session, url):
    try:
        async with session.get(url) as response:
            if response.status != 200:
                raise Exception(f"HTTP error: {response.status}")
            return await response.json()
    except aiohttp.ClientError as e:
        print(f"Request failed: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

6.4 调试异步代码

python 复制代码
# 启用调试模式
asyncio.run(main(), debug=True)

# 或者设置环境变量
# PYTHONASYNCIODEBUG=1 python script.py

七、性能对比

7.1 同步vs异步性能测试

python 复制代码
import time
import requests
import aiohttp

# 同步版本
def sync_fetch(urls):
    results = []
    for url in urls:
        results.append(requests.get(url).text)
    return results

# 异步版本
async def async_fetch(urls):
    async with aiohttp.ClientSession() as session:
        tasks = [session.get(url) for url in urls]
        responses = await asyncio.gather(*tasks)
        return [await r.text() for r in responses]

# 测试
urls = ['https://httpbin.org/get'] * 10

# 同步执行
start = time.time()
sync_fetch(urls)
print(f"Sync time: {time.time() - start:.2f}s")

# 异步执行
start = time.time()
asyncio.run(async_fetch(urls))
print(f"Async time: {time.time() - start:.2f}s")

7.2 性能对比结果

并发数 同步时间 异步时间 提升倍数
10 2.5s 0.5s 5x
50 12.3s 1.2s 10x
100 24.1s 2.1s 11x

八、实战案例:异步爬虫

8.1 爬取网页内容

python 复制代码
import asyncio
import aiohttp
from bs4 import BeautifulSoup

async def fetch_page(session, url):
    """获取网页内容"""
    async with session.get(url) as response:
        return await response.text()

async def parse_page(html):
    """解析网页"""
    soup = BeautifulSoup(html, 'html.parser')
    title = soup.title.string if soup.title else "No title"
    links = [a['href'] for a in soup.find_all('a', href=True)]
    return {'title': title, 'links': links}

async def crawl(url, session, visited):
    """递归爬取"""
    if url in visited:
        return
    
    visited.add(url)
    print(f"Crawling: {url}")
    
    try:
        html = await fetch_page(session, url)
        page_data = await parse_page(html)
        
        # 提取并爬取链接
        tasks = []
        for link in page_data['links'][:5]:  # 限制数量
            if link.startswith('http'):
                tasks.append(crawl(link, session, visited))
        
        await asyncio.gather(*tasks)
        
    except Exception as e:
        print(f"Failed to crawl {url}: {e}")

async def main():
    visited = set()
    async with aiohttp.ClientSession() as session:
        await crawl('https://example.com', session, visited)
    print(f"Total pages crawled: {len(visited)}")

asyncio.run(main())

8.2 异步数据处理管道

python 复制代码
async def download_data(url, queue):
    """下载数据"""
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            data = await response.json()
            await queue.put(data)

async def process_data(queue):
    """处理数据"""
    while True:
        data = await queue.get()
        
        # 处理逻辑
        processed = {
            'id': data.get('id'),
            'name': data.get('name'),
            'processed_at': time.time()
        }
        
        print(f"Processed: {processed}")
        queue.task_done()

async def main():
    queue = asyncio.Queue()
    
    # 启动处理器
    processor = asyncio.create_task(process_data(queue))
    
    # 下载数据
    urls = [
        'https://api.example.com/data/1',
        'https://api.example.com/data/2',
        'https://api.example.com/data/3'
    ]
    
    tasks = [download_data(url, queue) for url in urls]
    await asyncio.gather(*tasks)
    
    await queue.join()
    processor.cancel()

asyncio.run(main())

九、常见问题与解决方案

9.1 阻塞事件循环

python 复制代码
# 问题:CPU密集型任务阻塞事件循环
async def cpu_intensive_task():
    result = 0
    for i in range(10**8):
        result += i
    return result

# 解决方案:使用线程池
async def run_cpu_task():
    loop = asyncio.get_event_loop()
    result = await loop.run_in_executor(
        None,  # 使用默认线程池
        cpu_intensive_task
    )
    return result

9.2 共享状态管理

python 复制代码
# 使用锁保护共享状态
import asyncio

class Counter:
    def __init__(self):
        self.value = 0
        self.lock = asyncio.Lock()
    
    async def increment(self):
        async with self.lock:
            self.value += 1
            return self.value

async def worker(counter):
    for _ in range(1000):
        await counter.increment()

async def main():
    counter = Counter()
    tasks = [worker(counter) for _ in range(10)]
    await asyncio.gather(*tasks)
    print(f"Final value: {counter.value}")  # 应该是 10000

asyncio.run(main())

9.3 调试技巧

python 复制代码
# 设置日志级别
import logging
logging.basicConfig(level=logging.DEBUG)

# 监控任务
async def monitor_tasks():
    while True:
        tasks = [t for t in asyncio.all_tasks() if not t.done()]
        print(f"Active tasks: {len(tasks)}")
        await asyncio.sleep(1)

十、结语

Python异步编程是处理IO密集型任务的利器,通过合理使用asyncio,可以显著提高程序的并发性能。但需要注意避免阻塞调用、合理控制并发数量,并做好错误处理。希望本文能帮助你掌握Python异步编程的核心技能。

#Python #异步编程 #asyncio #并发

相关推荐
Industio_触觉智能8 小时前
瑞芯微RK3576迷你工控整机边缘计算盒子规格书参数配置性能说明,触觉智能IPC7609
人工智能·嵌入式硬件·边缘计算·openharmony·开源鸿蒙·瑞芯微·rk3576
AI街潜水的八角8 小时前
PyTorch框架——基于深度学习PmrNet神经网络AI去噪图像增强系统(含训练代码、创新对比、数据集和GUI交互界面)
人工智能·pytorch·深度学习
月光船幽幽8 小时前
Helio协议热切换实战解析
人工智能·动态规划·拓扑学
烟雨江南7858 小时前
农田上空的“智慧天眼”:多光谱视觉系统在作物生长监测与病虫害大范围筛查中的落地方案
人工智能·ai质检
逆境不可逃8 小时前
【与我学 ClaudeCode】并发篇 之 Background Tasks :守护线程与异步通知队列
人工智能·arcgis·agent
南屹川8 小时前
【前端进阶】React状态管理完全指南:从useState到Redux
人工智能
网宿安全演武实验室8 小时前
AI 赋能代码审计:静态扫描与AI Skill的协同实践
人工智能·主机安全·终端安全·网络攻防
hh.h.8 小时前
PyTorch模型适配昇腾NPU:从零开始的端到端流程
人工智能·pytorch·python·cann
老詹图解IT8 小时前
AI时代的个人隐私与网络安全自保——从账号密码到设备行为的完整体系
人工智能·安全·web安全