Python异步编程完全指南:从asyncio到高性能应用

引言:为什么需要异步编程?

想象一下,你要从10个网站下载数据。如果用传统方式,代码会等第一个网站响应后再请求第二个,这太慢了!异步编程让你可以同时发起所有请求,哪个先返回就处理哪个。

python

python 复制代码
# 同步 vs 异步的直观对比
import time

def sync_download():
    """同步下载 - 一个一个来"""
    websites = ["site1", "site2", "site3"]
    for site in websites:
        time.sleep(1)  # 模拟网络延迟
        print(f"下载完成: {site}")

# 同步方式需要3秒
# 异步方式可能只需要1秒多一点

一、异步基础:async/await

1.1 第一个异步程序

python 复制代码
import asyncio
import time

print("=== 异步编程基础 ===")

# 定义一个异步函数
async def say_hello(name, delay):
    """异步打招呼函数"""
    await asyncio.sleep(delay)  # 异步等待
    print(f"Hello, {name}! (等待了{delay}秒)")
    return f"Hello {name}"

# 运行异步函数
async def main():
    print("开始时间:", time.strftime("%H:%M:%S"))
    
    # 运行一个异步任务
    result = await say_hello("张三", 2)
    print(f"返回值: {result}")
    
    print("结束时间:", time.strftime("%H:%M:%S"))

# Python 3.7+ 的运行方式
print("运行单个异步任务:")
asyncio.run(main())

print("\n" + "="*50)

1.2 并发执行多个任务

python 复制代码
async def download_file(filename, download_time):
    """模拟下载文件"""
    print(f"开始下载: {filename}")
    await asyncio.sleep(download_time)
    print(f"下载完成: {filename} (耗时{download_time}秒)")
    return f"{filename}_content"

async def concurrent_downloads():
    """并发下载多个文件"""
    print("开始并发下载:")
    start_time = time.time()
    
    # 创建多个任务
    task1 = asyncio.create_task(download_file("file1.txt", 3))
    task2 = asyncio.create_task(download_file("file2.txt", 2))
    task3 = asyncio.create_task(download_file("file3.txt", 1))
    
    # 等待所有任务完成
    results = await asyncio.gather(task1, task2, task3)
    
    end_time = time.time()
    print(f"所有文件下载完成! 总耗时: {end_time - start_time:.2f}秒")
    print(f"下载结果: {results}")
    
    return results

print("并发执行多个任务:")
asyncio.run(concurrent_downloads())

print("\n" + "="*50)

二、异步实战:网络请求

2.1 异步HTTP请求

python 复制代码
import aiohttp
import asyncio

async def fetch_url(session, url, timeout=5):
    """异步获取URL内容"""
    try:
        async with session.get(url, timeout=timeout) as response:
            if response.status == 200:
                content = await response.text()
                return {
                    'url': url,
                    'status': response.status,
                    'content_length': len(content),
                    'success': True
                }
            else:
                return {
                    'url': url,
                    'status': response.status,
                    'success': False
                }
    except asyncio.TimeoutError:
        return {
            'url': url,
            'status': 'timeout',
            'success': False
        }
    except Exception as e:
        return {
            'url': url,
            'status': 'error',
            'error': str(e),
            'success': False
        }

async def fetch_multiple_urls(urls):
    """并发获取多个URL"""
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_url(session, url) for url in urls]
        results = await asyncio.gather(*tasks)
        return results

# 测试函数
async def test_http_requests():
    """测试HTTP请求"""
    # 一些测试URL(使用公开的测试API)
    test_urls = [
        "https://httpbin.org/delay/1",  # 延迟1秒
        "https://httpbin.org/delay/2",  # 延迟2秒
        "https://httpbin.org/status/200",
        "https://httpbin.org/status/404",
        "https://httpbin.org/ip",
        "https://httpbin.org/user-agent"
    ]
    
    print("开始并发HTTP请求...")
    start_time = time.time()
    
    results = await fetch_multiple_urls(test_urls)
    
    end_time = time.time()
    print(f"请求完成! 总耗时: {end_time - start_time:.2f}秒")
    
    # 显示结果
    print("\n请求结果:")
    for i, result in enumerate(results, 1):
        if result['success']:
            print(f"{i}. {result['url']} - 成功, 状态码: {result['status']}, "
                  f"内容长度: {result['content_length']}")
        else:
            print(f"{i}. {result['url']} - 失败, 状态: {result['status']}")
    
    return results

print("异步HTTP请求演示:")
# asyncio.run(test_http_requests())

print("(注意:由于网络请求需要真实URL,这里只展示代码结构)")

print("\n" + "="*50)

2.2 限制并发数

python 复制代码
class RateLimiter:
    """速率限制器"""
    
    def __init__(self, max_concurrent):
        self.semaphore = asyncio.Semaphore(max_concurrent)
    
    async def limited_fetch(self, session, url):
        """带限制的获取"""
        async with self.semaphore:
            return await fetch_url(session, url)

async def rate_limited_requests(urls, max_concurrent=3):
    """带速率限制的请求"""
    rate_limiter = RateLimiter(max_concurrent)
    
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in urls:
            task = asyncio.create_task(
                rate_limiter.limited_fetch(session, url)
            )
            tasks.append(task)
        
        # 显示进度
        print(f"开始处理 {len(urls)} 个URL,并发数: {max_concurrent}")
        
        results = []
        for i, task in enumerate(asyncio.as_completed(tasks), 1):
            result = await task
            results.append(result)
            print(f"进度: {i}/{len(urls)} - {result['url']}")
        
        return results

print("速率限制演示:")
print("可以控制同时发起的请求数量,避免对服务器造成过大压力")

print("\n" + "="*50)

三、异步文件操作

3.1 异步读写文件

python 复制代码
import aiofiles
import os

async def async_write_file(filename, content):
    """异步写入文件"""
    try:
        async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
            await f.write(content)
        print(f"文件写入成功: {filename}")
        return True
    except Exception as e:
        print(f"文件写入失败 {filename}: {e}")
        return False

async def async_read_file(filename):
    """异步读取文件"""
    try:
        async with aiofiles.open(filename, 'r', encoding='utf-8') as f:
            content = await f.read()
        print(f"文件读取成功: {filename}, 长度: {len(content)}")
        return content
    except Exception as e:
        print(f"文件读取失败 {filename}: {e}")
        return None

async def process_multiple_files():
    """处理多个文件"""
    # 创建一些测试文件
    files_data = {
        "test1.txt": "这是第一个测试文件的内容\nHello World!",
        "test2.txt": "这是第二个测试文件\nPython异步编程很有趣",
        "test3.txt": "第三个文件\n" + "数据行\n" * 10
    }
    
    print("开始异步文件操作演示...")
    
    # 并发写入文件
    write_tasks = []
    for filename, content in files_data.items():
        task = asyncio.create_task(async_write_file(filename, content))
        write_tasks.append(task)
    
    write_results = await asyncio.gather(*write_tasks)
    print(f"写入完成,成功: {sum(write_results)}/{len(write_results)}")
    
    # 并发读取文件
    read_tasks = []
    for filename in files_data.keys():
        task = asyncio.create_task(async_read_file(filename))
        read_tasks.append(task)
    
    read_results = await asyncio.gather(*read_tasks)
    
    # 显示读取的内容摘要
    print("\n文件内容摘要:")
    for filename, content in zip(files_data.keys(), read_results):
        if content:
            preview = content[:50].replace('\n', ' ') + "..."
            print(f"{filename}: {preview}")
    
    # 清理测试文件
    print("\n清理测试文件...")
    for filename in files_data.keys():
        try:
            os.remove(filename)
            print(f"删除: {filename}")
        except:
            pass

print("异步文件操作演示:")
# asyncio.run(process_multiple_files())

print("\n" + "="*50)

四、异步与数据库

4.1 异步数据库操作

python 复制代码
# 这里以SQLite为例,实际中常用的是异步MySQL/PostgreSQL驱动
import aiosqlite

async def async_database_demo():
    """异步数据库操作演示"""
    print("开始异步数据库演示...")
    
    # 创建内存数据库
    async with aiosqlite.connect(':memory:') as db:
        # 创建表
        await db.execute('''
            CREATE TABLE users (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT NOT NULL,
                email TEXT UNIQUE NOT NULL,
                age INTEGER
            )
        ''')
        await db.commit()
        print("表创建成功")
        
        # 插入数据
        users = [
            ('张三', 'zhangsan@example.com', 25),
            ('李四', 'lisi@example.com', 30),
            ('王五', 'wangwu@example.com', 28),
            ('赵六', 'zhaoliu@example.com', 35)
        ]
        
        # 并发插入
        insert_tasks = []
        for name, email, age in users:
            task = asyncio.create_task(
                db.execute(
                    "INSERT INTO users (name, email, age) VALUES (?, ?, ?)",
                    (name, email, age)
                )
            )
            insert_tasks.append(task)
        
        await asyncio.gather(*insert_tasks)
        await db.commit()
        print(f"插入 {len(users)} 条记录成功")
        
        # 查询数据
        async with db.execute("SELECT * FROM users ORDER BY age") as cursor:
            rows = await cursor.fetchall()
            print("\n用户列表 (按年龄排序):")
            for row in rows:
                print(f"  ID: {row[0]}, 姓名: {row[1]}, 邮箱: {row[2]}, 年龄: {row[3]}")
        
        # 复杂查询
        print("\n统计信息:")
        async with db.execute("SELECT COUNT(*), AVG(age) FROM users") as cursor:
            count, avg_age = await cursor.fetchone()
            print(f"  用户总数: {count}")
            print(f"  平均年龄: {avg_age:.1f}")
        
        # 事务演示
        print("\n事务演示:")
        try:
            await db.execute("BEGIN TRANSACTION")
            
            # 更新操作
            await db.execute("UPDATE users SET age = age + 1 WHERE age < 30")
            
            # 模拟一个可能失败的操作
            should_fail = False
            if should_fail:
                raise Exception("模拟事务失败")
            
            await db.execute("DELETE FROM users WHERE age > 40")
            
            await db.execute("COMMIT")
            print("  事务提交成功")
            
        except Exception as e:
            await db.execute("ROLLBACK")
            print(f"  事务回滚: {e}")
        
        print("\n演示完成!")

print("异步数据库操作演示:")
# asyncio.run(async_database_demo())

print("\n" + "="*50)

五、异步Web框架:FastAPI实战

5.1 快速创建异步API

python 复制代码
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from typing import List, Optional
import asyncio

# 创建FastAPI应用
app = FastAPI(title="异步API演示", version="1.0.0")

# 数据模型
class Item(BaseModel):
    name: str
    description: Optional[str] = None
    price: float
    stock: int = 0

class Order(BaseModel):
    item_id: int
    quantity: int
    customer_name: str

# 模拟数据库
fake_items_db = {
    1: Item(name="笔记本电脑", description="高性能游戏本", price=6999.99, stock=10),
    2: Item(name="智能手机", description="最新款5G手机", price=3999.99, stock=50),
    3: Item(name="平板电脑", description="轻薄便携", price=2999.99, stock=30)
}

orders_db = []

# 异步任务(模拟耗时操作)
async def process_order_background(order_id: int, order_data: Order):
    """后台处理订单"""
    print(f"[后台] 开始处理订单 {order_id}")
    await asyncio.sleep(3)  # 模拟耗时处理
    print(f"[后台] 订单 {order_id} 处理完成: {order_data.customer_name} 购买了 {order_data.quantity} 件商品")

# API端点
@app.get("/")
async def root():
    """根路径"""
    return {"message": "欢迎使用异步API", "status": "运行中"}

@app.get("/items/", response_model=List[Item])
async def list_items(skip: int = 0, limit: int = 10):
    """获取商品列表(支持分页)"""
    await asyncio.sleep(0.5)  # 模拟数据库查询延迟
    items = list(fake_items_db.values())[skip:skip + limit]
    return items

@app.get("/items/{item_id}", response_model=Item)
async def get_item(item_id: int):
    """获取单个商品"""
    if item_id not in fake_items_db:
        raise HTTPException(status_code=404, detail="商品未找到")
    
    # 模拟复杂的异步查询
    await asyncio.sleep(0.3)
    return fake_items_db[item_id]

@app.post("/orders/")
async def create_order(order: Order, background_tasks: BackgroundTasks):
    """创建订单(使用后台任务)"""
    if order.item_id not in fake_items_db:
        raise HTTPException(status_code=404, detail="商品不存在")
    
    item = fake_items_db[order.item_id]
    if order.quantity > item.stock:
        raise HTTPException(status_code=400, detail="库存不足")
    
    # 更新库存
    item.stock -= order.quantity
    
    # 生成订单ID
    order_id = len(orders_db) + 1
    
    # 将订单加入数据库
    orders_db.append({
        "id": order_id,
        **order.dict(),
        "total_price": item.price * order.quantity,
        "status": "processing"
    })
    
    # 添加后台任务
    background_tasks.add_task(process_order_background, order_id, order)
    
    return {
        "order_id": order_id,
        "message": "订单已接收,正在处理中",
        "total": item.price * order.quantity
    }

@app.get("/orders/{order_id}")
async def get_order_status(order_id: int):
    """获取订单状态"""
    if order_id < 1 or order_id > len(orders_db):
        raise HTTPException(status_code=404, detail="订单不存在")
    
    await asyncio.sleep(0.2)  # 模拟查询延迟
    return orders_db[order_id - 1]

# 运行服务器(这段代码在实际文件中运行)
if __name__ == "__main__":
    import uvicorn
    print("启动FastAPI服务器...")
    print("访问 http://localhost:8000/docs 查看API文档")
    uvicorn.run(app, host="0.0.0.0", port=8000)

print("FastAPI异步Web框架演示:")
print("1. 定义了Item和Order数据模型")
print("2. 创建了商品列表、查询、下单等API端点")
print("3. 使用BackgroundTasks处理耗时操作")
print("4. 支持异步数据库操作(示例中是内存存储)")

print("\n" + "="*50)

六、性能优化与错误处理

6.1 异步性能监控

python 复制代码
import asyncio
import time
from contextlib import asynccontextmanager

class AsyncTimer:
    """异步计时器"""
    
    def __init__(self, name):
        self.name = name
        self.start_time = None
        self.end_time = None
    
    async def __aenter__(self):
        self.start_time = time.time()
        print(f"[计时开始] {self.name}")
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        self.end_time = time.time()
        duration = self.end_time - self.start_time
        print(f"[计时结束] {self.name}: {duration:.4f}秒")
        if exc_type:
            print(f"[错误] {self.name}: {exc_val}")

async def monitored_operation():
    """被监控的操作"""
    async with AsyncTimer("monitored_operation"):
        # 模拟一些异步操作
        await asyncio.sleep(1)
        
        # 模拟可能失败的操作
        if time.time() % 2 > 1:
            raise ValueError("随机错误")
        
        # 更多操作
        await asyncio.sleep(0.5)
        return "操作成功"

async def error_handling_demo():
    """错误处理演示"""
    print("=== 异步错误处理 ===")
    
    # 1. 基本的try-except
    try:
        result = await monitored_operation()
        print(f"结果: {result}")
    except ValueError as e:
        print(f"捕获到错误: {e}")
    
    print("\n2. asyncio.gather的错误处理:")
    
    async def task_with_error(n):
        await asyncio.sleep(0.1)
        if n == 2:
            raise ValueError(f"任务 {n} 故意失败")
        return f"任务 {n} 成功"
    
    # 方式1: 全部完成,收集异常
    tasks = [task_with_error(i) for i in range(5)]
    results = await asyncio.gather(*tasks, return_exceptions=True)
    
    print("所有任务结果:")
    for i, result in enumerate(results):
        if isinstance(result, Exception):
            print(f"  任务 {i}: 失败 - {result}")
        else:
            print(f"  任务 {i}: 成功 - {result}")
    
    print("\n3. asyncio.wait的错误处理:")
    
    tasks = [asyncio.create_task(task_with_error(i)) for i in range(5)]
    
    # 等待所有任务完成
    done, pending = await asyncio.wait(tasks, return_when=asyncio.ALL_COMPLETED)
    
    print("完成的任务:")
    for task in done:
        try:
            result = task.result()
            print(f"  成功: {result}")
        except Exception as e:
            print(f"  失败: {e}")

async def performance_optimization():
    """性能优化技巧"""
    print("\n=== 性能优化技巧 ===")
    
    # 1. 避免在热路径中使用await
    print("1. 避免不必要的await:")
    
    async def inefficient():
        """低效的实现"""
        result = []
        for i in range(10):
            # 每次循环都await,效率低
            await asyncio.sleep(0.01)
            result.append(i)
        return result
    
    async def efficient():
        """高效的实现"""
        # 批量创建任务
        tasks = []
        for i in range(10):
            task = asyncio.create_task(asyncio.sleep(0.01))
            tasks.append((task, i))
        
        # 一次性等待所有任务
        results = []
        for task, i in tasks:
            await task
            results.append(i)
        
        return results
    
    # 2. 使用asyncio.Queue进行流量控制
    print("\n2. 使用asyncio.Queue:")
    
    async def worker(name, queue):
        """工作者协程"""
        while True:
            item = await queue.get()
            if item is None:  # 终止信号
                break
            
            print(f"{name} 处理: {item}")
            await asyncio.sleep(0.1)
            queue.task_done()
    
    async def queue_demo():
        """队列演示"""
        queue = asyncio.Queue(maxsize=5)
        
        # 启动工作者
        workers = [
            asyncio.create_task(worker(f"Worker-{i}", queue))
            for i in range(3)
        ]
        
        # 生产项目
        for i in range(10):
            await queue.put(f"Item-{i}")
            print(f"生产: Item-{i}")
        
        # 等待所有项目被处理
        await queue.join()
        
        # 停止工作者
        for _ in workers:
            await queue.put(None)
        
        await asyncio.gather(*workers)
    
    await queue_demo()

print("性能优化与错误处理:")
asyncio.run(error_handling_demo())
# asyncio.run(performance_optimization())

print("\n" + "="*50)

七、实战项目:异步爬虫系统

python 复制代码
import asyncio
import aiohttp
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import re

class AsyncWebCrawler:
    """异步网络爬虫"""
    
    def __init__(self, start_url, max_depth=2, max_concurrent=10):
        self.start_url = start_url
        self.max_depth = max_depth
        self.max_concurrent = max_concurrent
        self.visited_urls = set()
        self.results = []
        self.semaphore = asyncio.Semaphore(max_concurrent)
    
    def normalize_url(self, url, base_url):
        """标准化URL"""
        if url.startswith('#'):
            return None
        if url.startswith('javascript:'):
            return None
        
        # 转换为绝对URL
        absolute_url = urljoin(base_url, url)
        
        # 移除片段标识符
        parsed = urlparse(absolute_url)
        return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
    
    async def fetch_page(self, session, url):
        """获取页面内容"""
        async with self.semaphore:
            try:
                async with session.get(url, timeout=10) as response:
                    if response.status == 200:
                        content_type = response.headers.get('content-type', '')
                        if 'text/html' in content_type:
                            html = await response.text()
                            return {
                                'url': url,
                                'html': html,
                                'status': 'success'
                            }
                        else:
                            return {
                                'url': url,
                                'status': 'not_html',
                                'content_type': content_type
                            }
                    else:
                        return {
                            'url': url,
                            'status': 'error',
                            'status_code': response.status
                        }
            except Exception as e:
                return {
                    'url': url,
                    'status': 'exception',
                    'error': str(e)
                }
    
    def extract_links(self, html, base_url):
        """从HTML中提取链接"""
        soup = BeautifulSoup(html, 'html.parser')
        links = []
        
        for link in soup.find_all('a', href=True):
            url = self.normalize_url(link['href'], base_url)
            if url and url not in self.visited_urls:
                links.append(url)
        
        # 提取标题
        title = soup.title.string if soup.title else "无标题"
        
        # 提取正文(简化版)
        text = soup.get_text()
        text = re.sub(r'\s+', ' ', text).strip()
        
        return links, title, text[:200]  # 只取前200字符
    
    async def crawl(self, session, url, depth=0):
        """递归爬取"""
        if depth > self.max_depth:
            return
        
        if url in self.visited_urls:
            return
        
        self.visited_urls.add(url)
        print(f"[深度 {depth}] 爬取: {url}")
        
        # 获取页面
        result = await self.fetch_page(session, url)
        
        if result['status'] == 'success':
            html = result['html']
            links, title, preview = self.extract_links(html, url)
            
            # 保存结果
            self.results.append({
                'url': url,
                'title': title,
                'preview': preview,
                'depth': depth,
                'link_count': len(links)
            })
            
            # 递归爬取链接
            if depth < self.max_depth and links:
                tasks = []
                for link in links:
                    if link not in self.visited_urls:
                        task = asyncio.create_task(
                            self.crawl(session, link, depth + 1)
                        )
                        tasks.append(task)
                
                if tasks:
                    await asyncio.gather(*tasks)
    
    async def run(self):
        """运行爬虫"""
        print(f"开始爬虫: {self.start_url}")
        print(f"最大深度: {self.max_depth}, 最大并发: {self.max_concurrent}")
        
        async with aiohttp.ClientSession() as session:
            await self.crawl(session, self.start_url)
        
        print(f"\n爬虫完成!")
        print(f"总共访问了 {len(self.visited_urls)} 个URL")
        print(f"收集了 {len(self.results)} 个页面")
        
        return self.results

# 使用示例
async def crawler_demo():
    """爬虫演示"""
    # 注意:实际使用时请遵守robots.txt,不要对网站造成压力
    crawler = AsyncWebCrawler(
        start_url="https://httpbin.org/html",  # 使用测试页面
        max_depth=1,
        max_concurrent=5
    )
    
    results = await crawler.run()
    
    # 显示结果
    print("\n爬取结果:")
    for i, result in enumerate(results[:5], 1):  # 只显示前5个
        print(f"{i}. {result['title']}")
        print(f"   地址: {result['url']}")
        print(f"   深度: {result['depth']}, 链接数: {result['link_count']}")
        print(f"   预览: {result['preview']}")
        print()

print("异步爬虫系统演示:")
print("这个爬虫可以并发访问多个页面,高效收集网络信息")
print("注意:实际使用时需要遵守网站规则,添加延迟避免被封")

print("\n" + "="*50)

总结:异步编程的最佳实践

适合异步的场景:

  1. I/O密集型任务(网络请求、文件读写、数据库查询)
  2. 高并发服务(Web服务器、API服务)
  3. 实时应用(聊天室、实时通知)
  4. 爬虫和数据采集

不适合异步的场景:

  1. CPU密集型任务(科学计算、图像处理)
  2. 简单脚本(没有并发需求)
  3. 对顺序有严格要求的任务
相关推荐
deephub2 小时前
机器学习时间特征处理:循环编码(Cyclical Encoding)与其在预测模型中的应用
人工智能·python·机器学习·特征工程·时间序列
追光天使2 小时前
Python 连接数据库并遍历数据
python
BBB努力学习程序设计2 小时前
Python迭代器与生成器深度解析:懒加载的艺术
python·pycharm
dazzle2 小时前
OpenCV基础教学(二):图像的灰度化处理
python·opencv·计算机视觉
代码洲学长2 小时前
RNN模型01
人工智能·python·rnn·自然语言处理·gru·lstm
饕餮争锋2 小时前
REPL简介
python
执笔论英雄2 小时前
【RL]大模型训练1F1B执行过程
python
Amelia1111112 小时前
day35
python
superman超哥3 小时前
仓颉Actor模型的实现机制深度解析
开发语言·后端·python·c#·仓颉