【剪映小助手源码精讲】第40章：性能优化策略

第40章：性能优化策略

40.1 概述

性能优化是剪映小助手架构设计中的核心考量之一。作为一个处理视频编辑任务的应用，系统需要在处理大量媒体文件、复杂特效渲染和实时预览等功能时保持高效稳定。本章将详细介绍系统中采用的各类性能优化策略，包括异步处理优化、缓存机制优化、内存管理、并发控制等关键技术。

40.2 异步处理优化

40.2.1 异步I/O操作

剪映小助手广泛采用异步I/O操作来提升系统的并发处理能力，特别是在文件读写和网络请求方面：

python 复制代码

# 异步文件操作示例
import asyncio
import aiofiles

async def async_read_file(file_path: str) -> str:
    """异步读取文件内容"""
    async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
        content = await f.read()
    return content

async def async_write_file(file_path: str, content: str) -> None:
    """异步写入文件内容"""
    async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
        await f.write(content)

40.2.2 异步任务处理

系统中的视频生成任务采用异步处理模式，避免阻塞主线程：

python 复制代码

# 基于src/pyJianYingDraft/core/task.py的实现
class AsyncTaskProcessor:
    """异步任务处理器"""
    
    def __init__(self, max_workers: int = 4):
        self.executor = ThreadPoolExecutor(max_workers=max_workers)
        self.loop = asyncio.get_event_loop()
    
    async def process_video_task(self, task_data: Dict[str, Any]) -> str:
        """异步处理视频任务"""
        # 在线程池中执行耗时的视频处理操作
        result = await self.loop.run_in_executor(
            self.executor,
            self._sync_process_video,
            task_data
        )
        return result
    
    def _sync_process_video(self, task_data: Dict[str, Any]) -> str:
        """同步视频处理方法"""
        # 实际的视频处理逻辑
        pass

40.2.3 异步数据库操作

对于数据库操作，系统同样采用异步模式以提升性能：

python 复制代码

# 异步数据库连接池
from databases import Database

class AsyncDatabaseManager:
    """异步数据库管理器"""
    
    def __init__(self, database_url: str):
        self.database = Database(database_url)
        self._connection_pool_size = 10
    
    async def connect(self):
        """建立数据库连接"""
        await self.database.connect()
    
    async def execute_query(self, query: str, values: dict = None):
        """执行异步查询"""
        return await self.database.fetch_all(query, values)

40.3 缓存机制优化

40.3.1 多级缓存架构

系统采用多级缓存架构，包括内存缓存、文件缓存和分布式缓存：

python 复制代码

# 基于src/utils/draft_cache.py的实现
from collections import OrderedDict
import time
import threading

class MultiLevelCache:
    """多级缓存系统"""
    
    def __init__(self, max_memory_cache: int = 1000, 
                 cache_ttl: int = 3600):
        self.memory_cache = OrderedDict()
        self.file_cache_dir = "/tmp/cache"
        self.cache_ttl = cache_ttl
        self.max_memory_cache = max_memory_cache
        self._lock = threading.RLock()
        
        # 确保缓存目录存在
        os.makedirs(self.file_cache_dir, exist_ok=True)
    
    def get(self, key: str) -> Any:
        """获取缓存值"""
        with self._lock:
            # 先检查内存缓存
            if key in self.memory_cache:
                value, timestamp = self.memory_cache[key]
                if time.time() - timestamp < self.cache_ttl:
                    # 移动到末尾（LRU）
                    self.memory_cache.move_to_end(key)
                    return value
                else:
                    # 过期，删除
                    del self.memory_cache[key]
            
            # 检查文件缓存
            file_cache_path = os.path.join(self.file_cache_dir, f"{key}.cache")
            if os.path.exists(file_cache_path):
                with open(file_cache_path, 'rb') as f:
                    cached_data = pickle.load(f)
                    if time.time() - cached_data['timestamp'] < self.cache_ttl:
                        # 重新加载到内存缓存
                        self.put(key, cached_data['value'])
                        return cached_data['value']
            
            return None
    
    def put(self, key: str, value: Any) -> None:
        """设置缓存值"""
        with self._lock:
            # 添加到内存缓存
            self.memory_cache[key] = (value, time.time())
            self.memory_cache.move_to_end(key)
            
            # 如果超出限制，移除最旧的项
            if len(self.memory_cache) > self.max_memory_cache:
                self.memory_cache.popitem(last=False)
            
            # 同时保存到文件缓存
            file_cache_path = os.path.join(self.file_cache_dir, f"{key}.cache")
            cached_data = {
                'value': value,
                'timestamp': time.time()
            }
            with open(file_cache_path, 'wb') as f:
                pickle.dump(cached_data, f)

40.3.2 智能缓存预加载

系统实现智能缓存预加载机制，提前加载可能需要的数据：

python 复制代码

class SmartCachePreloader:
    """智能缓存预加载器"""
    
    def __init__(self, cache_manager: MultiLevelCache):
        self.cache_manager = cache_manager
        self.preload_patterns = {
            'video_segments': ['popular_videos', 'recent_videos'],
            'effects': ['commonly_used_effects', 'trending_effects'],
            'templates': ['popular_templates', 'recent_templates']
        }
        self.preload_thread = None
        self.stop_preload = False
    
    def start_preload(self):
        """启动预加载线程"""
        self.preload_thread = threading.Thread(target=self._preload_worker)
        self.preload_thread.daemon = True
        self.preload_thread.start()
    
    def _preload_worker(self):
        """预加载工作线程"""
        while not self.stop_preload:
            for category, keys in self.preload_patterns.items():
                for key in keys:
                    if not self.cache_manager.get(key):
                        # 异步加载数据到缓存
                        data = self._load_data_for_key(category, key)
                        if data:
                            self.cache_manager.put(key, data)
            
            # 每隔一段时间检查一次
            time.sleep(300)  # 5分钟
    
    def _load_data_for_key(self, category: str, key: str) -> Any:
        """根据类别和键加载数据"""
        # 实际的数据加载逻辑
        pass

40.4 内存管理优化

40.4.1 对象池模式

系统采用对象池模式来重用频繁创建和销毁的对象：

python 复制代码

# 基于src/pyJianYingDraft/core/pool.py的实现
import weakref
from typing import TypeVar, Generic, List, Optional

T = TypeVar('T')

class ObjectPool(Generic[T]):
    """通用对象池"""
    
    def __init__(self, factory_func, reset_func=None, max_size: int = 100):
        self.factory_func = factory_func
        self.reset_func = reset_func
        self.max_size = max_size
        self._pool: List[T] = []
        self._in_use: weakref.WeakSet[T] = weakref.WeakSet()
        self._lock = threading.Lock()
    
    def acquire(self) -> T:
        """从池中获取对象"""
        with self._lock:
            if self._pool:
                obj = self._pool.pop()
            else:
                obj = self.factory_func()
            
            self._in_use.add(obj)
            return obj
    
    def release(self, obj: T) -> None:
        """将对象归还到池中"""
        with self._lock:
            if obj in self._in_use:
                self._in_use.remove(obj)
                
                if len(self._pool) < self.max_size:
                    if self.reset_func:
                        self.reset_func(obj)
                    self._pool.append(obj)
    
    def clear(self) -> None:
        """清空对象池"""
        with self._lock:
            self._pool.clear()
            self._in_use.clear()

# 视频帧对象池示例
class VideoFramePool:
    """视频帧对象池"""
    
    def __init__(self, width: int, height: int, max_size: int = 50):
        self.width = width
        self.height = height
        self.pool = ObjectPool(
            factory_func=lambda: np.zeros((height, width, 3), dtype=np.uint8),
            reset_func=lambda frame: frame.fill(0),
            max_size=max_size
        )
    
    def get_frame(self) -> np.ndarray:
        """获取视频帧"""
        return self.pool.acquire()
    
    def return_frame(self, frame: np.ndarray) -> None:
        """归还视频帧"""
        self.pool.release(frame)

40.4.2 内存映射文件

对于大文件处理，系统使用内存映射文件技术：

python 复制代码

import mmap
import os

class MemoryMappedFileHandler:
    """内存映射文件处理器"""
    
    def __init__(self, file_path: str):
        self.file_path = file_path
        self.file_size = os.path.getsize(file_path)
        self._mmap = None
        self._file = None
    
    def __enter__(self):
        """上下文管理器进入"""
        self._file = open(self.file_path, 'rb')
        self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """上下文管理器退出"""
        if self._mmap:
            self._mmap.close()
        if self._file:
            self._file.close()
    
    def read_chunk(self, offset: int, size: int) -> bytes:
        """读取文件块"""
        if self._mmap and offset + size <= self.file_size:
            return self._mmap[offset:offset + size]
        return b''
    
    def get_buffer(self) -> mmap.mmap:
        """获取内存映射缓冲区"""
        return self._mmap

# 大视频文件处理示例
async def process_large_video(file_path: str, chunk_size: int = 1024*1024):
    """处理大视频文件"""
    with MemoryMappedFileHandler(file_path) as handler:
        file_size = handler.file_size
        processed_size = 0
        
        while processed_size < file_size:
            chunk = handler.read_chunk(processed_size, chunk_size)
            if not chunk:
                break
            
            # 异步处理数据块
            await process_video_chunk(chunk)
            processed_size += len(chunk)
            
            # 定期让出控制权，避免阻塞
            if processed_size % (chunk_size * 10) == 0:
                await asyncio.sleep(0)

40.4.3 垃圾回收优化

系统通过调整垃圾回收参数来优化内存使用：

python 复制代码

import gc
import sys

class GCOptimizer:
    """垃圾回收优化器"""
    
    def __init__(self):
        self.original_thresholds = gc.get_threshold()
        self.optimized_thresholds = (700, 10, 10)  # 更激进的垃圾回收
    
    def enable_optimized_gc(self):
        """启用优化的垃圾回收"""
        gc.set_threshold(*self.optimized_thresholds)
        gc.enable()
    
    def disable_gc_for_critical_section(self):
        """在关键代码段禁用垃圾回收"""
        gc.disable()
    
    def restore_gc(self):
        """恢复垃圾回收设置"""
        gc.set_threshold(*self.original_thresholds)
        gc.enable()
    
    def force_collection(self, generation: int = 2):
        """强制进行垃圾回收"""
        collected = gc.collect(generation)
        return collected
    
    def get_memory_stats(self) -> Dict[str, Any]:
        """获取内存统计信息"""
        return {
            'gc_stats': gc.get_stats(),
            'objects_count': len(gc.get_objects()),
            'garbage_count': len(gc.garbage),
            'thresholds': gc.get_threshold(),
            'memory_usage': sys.getsizeof(gc.get_objects())
        }

# 内存监控器
class MemoryMonitor:
    """内存监控器"""
    
    def __init__(self, check_interval: int = 60):
        self.check_interval = check_interval
        self.monitoring = False
        self.monitor_thread = None
        self.memory_threshold = 1024 * 1024 * 1024  # 1GB
    
    def start_monitoring(self):
        """开始内存监控"""
        self.monitoring = True
        self.monitor_thread = threading.Thread(target=self._monitor_worker)
        self.monitor_thread.daemon = True
        self.monitor_thread.start()
    
    def _monitor_worker(self):
        """监控工作线程"""
        import psutil
        
        while self.monitoring:
            memory_info = psutil.virtual_memory()
            
            if memory_info.used > self.memory_threshold:
                # 内存使用过高，触发垃圾回收
                gc_optimizer = GCOptimizer()
                gc_optimizer.force_collection()
                
                # 清理缓存
                self._cleanup_caches()
                
                logger.warning(f"High memory usage detected: {memory_info.percent}%")
            
            time.sleep(self.check_interval)
    
    def _cleanup_caches(self):
        """清理各类缓存"""
        # 清理草稿缓存
        if hasattr(draft_cache, 'DRAFT_CACHE'):
            draft_cache.DRAFT_CACHE.clear()
        
        # 清理其他缓存
        # ...

40.5 并发控制优化

40.5.1 连接池管理

系统实现数据库连接池和HTTP连接池管理：

python 复制代码

# 数据库连接池
from sqlalchemy.pool import QueuePool
import sqlalchemy as sa

class DatabaseConnectionPool:
    """数据库连接池管理器"""
    
    def __init__(self, database_url: str, pool_size: int = 20, 
                 max_overflow: int = 40, pool_timeout: int = 30):
        self.engine = sa.create_engine(
            database_url,
            poolclass=QueuePool,
            pool_size=pool_size,
            max_overflow=max_overflow,
            pool_timeout=pool_timeout,
            pool_pre_ping=True,  # 连接健康检查
            pool_recycle=3600    # 连接回收时间
        )
    
    def get_connection(self):
        """获取数据库连接"""
        return self.engine.connect()
    
    def execute_query(self, query: str, params: dict = None):
        """执行查询"""
        with self.get_connection() as conn:
            result = conn.execute(sa.text(query), params or {})
            return result.fetchall()

# HTTP连接池
import aiohttp
import asyncio

class HTTPConnectionPool:
    """HTTP连接池管理器"""
    
    def __init__(self, max_connections: int = 100, 
                 max_connections_per_host: int = 10,
                 timeout: int = 30):
        self.connector = aiohttp.TCPConnector(
            limit=max_connections,
            limit_per_host=max_connections_per_host,
            ttl_dns_cache=300,
            use_dns_cache=True,
            keepalive_timeout=30
        )
        
        self.timeout = aiohttp.ClientTimeout(total=timeout)
        self._session = None
    
    @property
    def session(self) -> aiohttp.ClientSession:
        """获取HTTP会话"""
        if self._session is None or self._session.closed:
            self._session = aiohttp.ClientSession(
                connector=self.connector,
                timeout=self.timeout
            )
        return self._session
    
    async def close(self):
        """关闭连接池"""
        if self._session and not self._session.closed:
            await self._session.close()

40.5.2 信号量控制

系统使用信号量来控制并发访问：

python 复制代码

# 基于src/service/video_gen.py的实现
import asyncio
from typing import Dict, Any
import threading

class ConcurrencyController:
    """并发控制器"""
    
    def __init__(self, max_concurrent_tasks: int = 5):
        self.max_concurrent_tasks = max_concurrent_tasks
        self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
        self.active_tasks: Dict[str, asyncio.Task] = {}
        self._lock = threading.Lock()
    
    async def execute_with_limit(self, task_id: str, coro_func, *args, **kwargs):
        """在并发限制下执行任务"""
        async with self.semaphore:
            with self._lock:
                if task_id in self.active_tasks:
                    raise ValueError(f"Task {task_id} is already running")
            
            try:
                task = asyncio.create_task(coro_func(*args, **kwargs))
                
                with self._lock:
                    self.active_tasks[task_id] = task
                
                result = await task
                return result
                
            finally:
                with self._lock:
                    self.active_tasks.pop(task_id, None)
    
    def get_active_task_count(self) -> int:
        """获取当前活跃任务数"""
        with self._lock:
            return len(self.active_tasks)
    
    async def cancel_task(self, task_id: str) -> bool:
        """取消指定任务"""
        with self._lock:
            task = self.active_tasks.get(task_id)
            if task and not task.done():
                task.cancel()
                return True
        return False

# 视频生成任务并发控制
class VideoGenConcurrencyController(ConcurrencyController):
    """视频生成并发控制器"""
    
    def __init__(self):
        # 根据系统资源动态调整最大并发数
        max_tasks = self._calculate_max_concurrent_tasks()
        super().__init__(max_tasks)
    
    def _calculate_max_concurrent_tasks(self) -> int:
        """根据系统资源计算最大并发任务数"""
        import psutil
        
        cpu_count = psutil.cpu_count()
        memory_gb = psutil.virtual_memory().total / (1024**3)
        
        # 基于CPU核心数和内存大小计算
        cpu_based_limit = cpu_count * 2
        memory_based_limit = int(memory_gb / 2)  # 每2GB内存支持1个并发任务
        
        return min(cpu_based_limit, memory_based_limit, 10)  # 最多10个

40.5.3 读写锁优化

对于读多写少的场景，系统使用读写锁优化：

python 复制代码

import threading
from typing import Any, Dict, List, Optional

class ReadWriteLock:
    """读写锁实现"""
    
    def __init__(self):
        self._read_ready = threading.Condition(threading.RLock())
        self._readers = 0
        self._writers = 0
        self._write_waiters = 0
    
    def acquire_read(self):
        """获取读锁"""
        self._read_ready.acquire()
        try:
            while self._writers > 0 or self._write_waiters > 0:
                self._read_ready.wait()
            self._readers += 1
        finally:
            self._read_ready.release()
    
    def release_read(self):
        """释放读锁"""
        self._read_ready.acquire()
        try:
            self._readers -= 1
            if self._readers == 0:
                self._read_ready.notifyAll()
        finally:
            self._read_ready.release()
    
    def acquire_write(self):
        """获取写锁"""
        self._read_ready.acquire()
        self._write_waiters += 1
        try:
            while self._readers > 0 or self._writers > 0:
                self._read_ready.wait()
            self._write_waiters -= 1
            self._writers += 1
        finally:
            self._read_ready.release()
    
    def release_write(self):
        """释放写锁"""
        self._read_ready.acquire()
        try:
            self._writers -= 1
            self._read_ready.notifyAll()
        finally:
            self._read_ready.release()

# 缓存数据结构的读写锁应用
class ConcurrentCache:
    """并发安全缓存"""
    
    def __init__(self):
        self._cache: Dict[str, Any] = {}
        self._lock = ReadWriteLock()
    
    def get(self, key: str) -> Optional[Any]:
        """获取缓存值（读操作）"""
        self._lock.acquire_read()
        try:
            return self._cache.get(key)
        finally:
            self._lock.release_read()
    
    def get_all(self) -> Dict[str, Any]:
        """获取所有缓存（读操作）"""
        self._lock.acquire_read()
        try:
            return self._cache.copy()
        finally:
            self._lock.release_read()
    
    def put(self, key: str, value: Any) -> None:
        """设置缓存值（写操作）"""
        self._lock.acquire_write()
        try:
            self._cache[key] = value
        finally:
            self._lock.release_write()
    
    def remove(self, key: str) -> bool:
        """删除缓存值（写操作）"""
        self._lock.acquire_write()
        try:
            return self._cache.pop(key, None) is not None
        finally:
            self._lock.release_write()

40.6 数据库性能优化

40.6.1 连接池和查询优化

python 复制代码

# 数据库查询优化器
class DatabaseQueryOptimizer:
    """数据库查询优化器"""
    
    def __init__(self, engine):
        self.engine = engine
        self.query_cache = {}
        self.cache_ttl = 300  # 5分钟缓存
    
    def optimize_query(self, query: str, params: dict = None) -> str:
        """优化SQL查询"""
        # 添加查询提示
        optimized_query = self._add_query_hints(query)
        
        # 使用预编译语句
        if params:
            optimized_query = self._parameterize_query(optimized_query, params)
        
        return optimized_query
    
    def _add_query_hints(self, query: str) -> str:
        """添加查询优化提示"""
        hints = []
        
        # 对于大表查询，添加索引提示
        if 'SELECT' in query.upper() and 'video_segments' in query.lower():
            hints.append('USE INDEX (idx_video_segment_time)')
        
        if hints:
            # 在SELECT后插入提示
            return query.replace('SELECT', f"SELECT {' '.join(hints)}", 1)
        
        return query
    
    def _parameterize_query(self, query: str, params: dict) -> str:
        """参数化查询"""
        # 使用命名参数防止SQL注入
        return query
    
    async def execute_optimized_query(self, query: str, params: dict = None) -> List[Dict]:
        """执行优化的查询"""
        # 检查查询缓存
        cache_key = f"{query}_{hash(str(params))}"
        current_time = time.time()
        
        if cache_key in self.query_cache:
            cached_result, timestamp = self.query_cache[cache_key]
            if current_time - timestamp < self.cache_ttl:
                return cached_result
        
        # 优化查询
        optimized_query = self.optimize_query(query, params)
        
        # 执行查询
        async with self.engine.acquire() as conn:
            result = await conn.fetch(optimized_query, params)
            
            # 缓存结果
            self.query_cache[cache_key] = (result, current_time)
            
            return result

# 索引管理器
class IndexManager:
    """数据库索引管理器"""
    
    def __init__(self, engine):
        self.engine = engine
        self.recommended_indexes = {
            'video_segments': [
                'CREATE INDEX idx_video_segment_time ON video_segments(start_time, end_time)',
                'CREATE INDEX idx_video_segment_material ON video_segments(material_id)',
                'CREATE INDEX idx_video_segment_track ON video_segments(track_id)'
            ],
            'effects': [
                'CREATE INDEX idx_effect_type ON effects(effect_type)',
                'CREATE INDEX idx_effect_segment ON effects(segment_id)',
                'CREATE INDEX idx_effect_time ON effects(start_time, end_time)'
            ],
            'drafts': [
                'CREATE INDEX idx_draft_created ON drafts(created_at)',
                'CREATE INDEX idx_draft_status ON drafts(status)',
                'CREATE INDEX idx_draft_user ON drafts(user_id)'
            ]
        }
    
    async def create_recommended_indexes(self):
        """创建推荐的索引"""
        for table, indexes in self.recommended_indexes.items():
            for index_sql in indexes:
                try:
                    async with self.engine.acquire() as conn:
                        await conn.execute(index_sql)
                        logger.info(f"Created index: {index_sql}")
                except Exception as e:
                    logger.warning(f"Failed to create index {index_sql}: {e}")
    
    async def analyze_table_performance(self, table_name: str) -> Dict[str, Any]:
        """分析表性能"""
        # 获取表统计信息
        query = """
        SELECT 
            table_rows,
            data_length,
            index_length,
            auto_increment
        FROM information_schema.tables 
        WHERE table_schema = DATABASE() AND table_name = :table_name
        """
        
        async with self.engine.acquire() as conn:
            stats = await conn.fetch(query, {'table_name': table_name})
            
            if stats:
                return {
                    'table_rows': stats[0]['table_rows'],
                    'data_size': stats[0]['data_length'],
                    'index_size': stats[0]['index_length'],
                    'total_size': stats[0]['data_length'] + stats[0]['index_length'],
                    'auto_increment': stats[0]['auto_increment']
                }
        
        return {}

40.6.2 批量操作优化

python 复制代码

# 批量插入优化器
class BatchInsertOptimizer:
    """批量插入优化器"""
    
    def __init__(self, engine, batch_size: int = 1000):
        self.engine = engine
        self.batch_size = batch_size
        self.pending_inserts = []
    
    def add_insert(self, table: str, data: Dict[str, Any]):
        """添加待插入数据"""
        self.pending_inserts.append((table, data))
        
        # 如果达到批次大小，执行批量插入
        if len(self.pending_inserts) >= self.batch_size:
            self.execute_batch_inserts()
    
    async def execute_batch_inserts(self):
        """执行批量插入"""
        if not self.pending_inserts:
            return
        
        # 按表分组
        table_groups = {}
        for table, data in self.pending_inserts:
            if table not in table_groups:
                table_groups[table] = []
            table_groups[table].append(data)
        
        # 批量插入每个表的数据
        for table, data_list in table_groups.items():
            await self._batch_insert_table(table, data_list)
        
        # 清空待插入列表
        self.pending_inserts.clear()
    
    async def _batch_insert_table(self, table: str, data_list: List[Dict[str, Any]]):
        """批量插入单个表的数据"""
        if not data_list:
            return
        
        # 构建批量插入SQL
        columns = list(data_list[0].keys())
        placeholders = ', '.join([f':{col}' for col in columns])
        columns_str = ', '.join(columns)
        
        insert_sql = f"""
        INSERT INTO {table} ({columns_str}) 
        VALUES ({placeholders})
        """
        
        # 执行批量插入
        async with self.engine.acquire() as conn:
            await conn.executemany(insert_sql, data_list)
            
            logger.info(f"Batch inserted {len(data_list)} records into {table}")
    
    async def flush(self):
        """刷新所有待插入数据"""
        await self.execute_batch_inserts()

# 批量更新优化器
class BatchUpdateOptimizer:
    """批量更新优化器"""
    
    def __init__(self, engine):
        self.engine = engine
    
    async def batch_update_with_case(self, table: str, update_data: List[Dict[str, Any]], 
                                   id_column: str = 'id'):
        """使用CASE语句进行批量更新"""
        if not update_data:
            return
        
        # 获取要更新的列（排除ID列）
        update_columns = [col for col in update_data[0].keys() if col != id_column]
        
        # 构建CASE语句
        case_statements = []
        for column in update_columns:
            case_parts = []
            for data in update_data:
                case_parts.append(f"WHEN {id_column} = {data[id_column]} THEN {data[column]}")
            
            case_statement = f"{column} = CASE {' '.join(case_parts)} END"
            case_statements.append(case_statement)
        
        # 构建WHERE子句
        ids = [str(data[id_column]) for data in update_data]
        where_clause = f"{id_column} IN ({','.join(ids)})"
        
        # 构建完整的UPDATE语句
        update_sql = f"""
        UPDATE {table} 
        SET {', '.join(case_statements)}
        WHERE {where_clause}
        """
        
        # 执行更新
        async with self.engine.acquire() as conn:
            await conn.execute(update_sql)
            
            logger.info(f"Batch updated {len(update_data)} records in {table}")

40.7 网络性能优化

40.7.1 HTTP请求优化

python 复制代码

# HTTP请求优化器
class HTTPRequestOptimizer:
    """HTTP请求优化器"""
    
    def __init__(self, connection_pool: HTTPConnectionPool):
        self.connection_pool = connection_pool
        self.request_cache = {}
        self.cache_ttl = 300  # 5分钟
        self.compression_enabled = True
    
    async def optimized_get(self, url: str, headers: dict = None, 
                           cache_key: str = None, **kwargs) -> Dict[str, Any]:
        """优化的GET请求"""
        # 检查缓存
        if cache_key and cache_key in self.request_cache:
            cached_result, timestamp = self.request_cache[cache_key]
            if time.time() - timestamp < self.cache_ttl:
                return cached_result
        
        # 设置优化头
        optimized_headers = headers or {}
        if self.compression_enabled:
            optimized_headers['Accept-Encoding'] = 'gzip, deflate'
            optimized_headers['Connection'] = 'keep-alive'
        
        # 执行请求
        session = self.connection_pool.session
        async with session.get(url, headers=optimized_headers, **kwargs) as response:
            result = {
                'status': response.status,
                'headers': dict(response.headers),
                'content': await response.read()
            }
            
            # 缓存结果
            if cache_key:
                self.request_cache[cache_key] = (result, time.time())
            
            return result
    
    async def optimized_post(self, url: str, data: Any = None, 
                           headers: dict = None, **kwargs) -> Dict[str, Any]:
        """优化的POST请求"""
        optimized_headers = headers or {}
        optimized_headers['Content-Type'] = 'application/json'
        
        if self.compression_enabled and isinstance(data, (dict, list)):
            # 压缩大数据
            import gzip
            import json
            
            json_data = json.dumps(data).encode('utf-8')
            if len(json_data) > 1024:  # 大于1KB的数据进行压缩
                compressed_data = gzip.compress(json_data)
                optimized_headers['Content-Encoding'] = 'gzip'
                data = compressed_data
        
        session = self.connection_pool.session
        async with session.post(url, data=data, headers=optimized_headers, **kwargs) as response:
            return {
                'status': response.status,
                'headers': dict(response.headers),
                'content': await response.read()
            }

# CDN优化器
class CDNOptimizer:
    """CDN优化器"""
    
    def __init__(self, cdn_endpoints: List[str]):
        self.cdn_endpoints = cdn_endpoints
        self.endpoint_health = {}
        self.health_check_interval = 60  # 60秒
        self._health_check_task = None
    
    async def start_health_check(self):
        """启动健康检查"""
        self._health_check_task = asyncio.create_task(self._health_check_worker())
    
    async def _health_check_worker(self):
        """健康检查工作线程"""
        while True:
            await self._check_all_endpoints()
            await asyncio.sleep(self.health_check_interval)
    
    async def _check_all_endpoints(self):
        """检查所有CDN端点"""
        for endpoint in self.cdn_endpoints:
            try:
                start_time = time.time()
                # 发送健康检查请求
                response = await self._ping_endpoint(endpoint)
                response_time = time.time() - start_time
                
                self.endpoint_health[endpoint] = {
                    'status': 'healthy' if response.status < 400 else 'unhealthy',
                    'response_time': response_time,
                    'last_check': time.time()
                }
            except Exception as e:
                self.endpoint_health[endpoint] = {
                    'status': 'unhealthy',
                    'error': str(e),
                    'last_check': time.time()
                }
    
    def get_optimal_endpoint(self) -> str:
        """获取最优的CDN端点"""
        healthy_endpoints = [
            ep for ep, health in self.endpoint_health.items()
            if health.get('status') == 'healthy'
        ]
        
        if not healthy_endpoints:
            # 如果没有健康的端点，返回默认端点
            return self.cdn_endpoints[0]
        
        # 选择响应时间最短的端点
        optimal_endpoint = min(
            healthy_endpoints,
            key=lambda ep: self.endpoint_health[ep].get('response_time', float('inf'))
        )
        
        return optimal_endpoint

40.7.2 数据压缩优化

python 复制代码

# 数据压缩管理器
class DataCompressionManager:
    """数据压缩管理器"""
    
    def __init__(self):
        self.compression_threshold = 1024  # 1KB
        self.compression_methods = {
            'gzip': self._gzip_compress,
            'deflate': self._deflate_compress,
            'brotli': self._brotli_compress
        }
        self.preferred_method = 'gzip'
    
    def compress_data(self, data: bytes, method: str = None) -> tuple:
        """压缩数据"""
        if len(data) < self.compression_threshold:
            return data, None  # 数据太小，不压缩
        
        method = method or self.preferred_method
        
        if method not in self.compression_methods:
            raise ValueError(f"Unsupported compression method: {method}")
        
        compressor = self.compression_methods[method]
        compressed_data = compressor(data)
        
        # 检查压缩效果
        compression_ratio = len(compressed_data) / len(data)
        if compression_ratio > 0.9:  # 压缩效果不佳
            return data, None
        
        return compressed_data, method
    
    def decompress_data(self, data: bytes, method: str) -> bytes:
        """解压缩数据"""
        if method is None:
            return data  # 未压缩
        
        decompression_methods = {
            'gzip': self._gzip_decompress,
            'deflate': self._deflate_decompress,
            'brotli': self._brotli_decompress
        }
        
        if method not in decompression_methods:
            raise ValueError(f"Unsupported decompression method: {method}")
        
        decompressor = decompression_methods[method]
        return decompressor(data)
    
    def _gzip_compress(self, data: bytes) -> bytes:
        """GZIP压缩"""
        import gzip
        return gzip.compress(data)
    
    def _gzip_decompress(self, data: bytes) -> bytes:
        """GZIP解压缩"""
        import gzip
        return gzip.decompress(data)
    
    def _deflate_compress(self, data: bytes) -> bytes:
        """Deflate压缩"""
        import zlib
        return zlib.compress(data)
    
    def _deflate_decompress(self, data: bytes) -> bytes:
        """Deflate解压缩"""
        import zlib
        return zlib.decompress(data)
    
    def _brotli_compress(self, data: bytes) -> bytes:
        """Brotli压缩"""
        import brotli
        return brotli.compress(data)
    
    def _brotli_decompress(self, data: bytes) -> bytes:
        """Brotli解压缩"""
        import brotli
        return brotli.decompress(data)

40.8 性能监控与调优

40.8.1 性能指标收集

python 复制代码

# 性能监控器
class PerformanceMonitor:
    """性能监控器"""
    
    def __init__(self):
        self.metrics = {
            'request_count': 0,
            'request_duration': [],
            'error_count': 0,
            'cache_hit_rate': 0,
            'memory_usage': [],
            'cpu_usage': []
        }
        self.monitoring_enabled = True
        self.collect_interval = 60  # 60秒
        self._monitor_task = None
    
    def start_monitoring(self):
        """启动性能监控"""
        self._monitor_task = asyncio.create_task(self._monitor_worker())
    
    async def _monitor_worker(self):
        """监控工作线程"""
        import psutil
        
        while self.monitoring_enabled:
            # 收集系统指标
            memory_info = psutil.virtual_memory()
            cpu_percent = psutil.cpu_percent(interval=1)
            
            self.metrics['memory_usage'].append({
                'timestamp': time.time(),
                'percent': memory_info.percent,
                'used': memory_info.used,
                'available': memory_info.available
            })
            
            self.metrics['cpu_usage'].append({
                'timestamp': time.time(),
                'percent': cpu_percent
            })
            
            # 限制数据点数量
            if len(self.metrics['memory_usage']) > 1000:
                self.metrics['memory_usage'] = self.metrics['memory_usage'][-1000:]
            
            if len(self.metrics['cpu_usage']) > 1000:
                self.metrics['cpu_usage'] = self.metrics['cpu_usage'][-1000:]
            
            await asyncio.sleep(self.collect_interval)
    
    def record_request(self, duration: float, success: bool = True):
        """记录请求"""
        self.metrics['request_count'] += 1
        self.metrics['request_duration'].append(duration)
        
        if not success:
            self.metrics['error_count'] += 1
        
        # 限制数据点数量
        if len(self.metrics['request_duration']) > 10000:
            self.metrics['request_duration'] = self.metrics['request_duration'][-10000:]
    
    def get_performance_stats(self) -> Dict[str, Any]:
        """获取性能统计"""
        stats = {
            'total_requests': self.metrics['request_count'],
            'error_rate': self.metrics['error_count'] / max(self.metrics['request_count'], 1),
            'avg_request_duration': 0,
            'p95_request_duration': 0,
            'p99_request_duration': 0,
            'current_memory_usage': 0,
            'current_cpu_usage': 0
        }
        
        # 计算请求持续时间统计
        if self.metrics['request_duration']:
            durations = sorted(self.metrics['request_duration'])
            stats['avg_request_duration'] = sum(durations) / len(durations)
            stats['p95_request_duration'] = durations[int(len(durations) * 0.95)]
            stats['p99_request_duration'] = durations[int(len(durations) * 0.99)]
        
        # 获取当前系统资源使用情况
        if self.metrics['memory_usage']:
            stats['current_memory_usage'] = self.metrics['memory_usage'][-1]['percent']
        
        if self.metrics['cpu_usage']:
            stats['current_cpu_usage'] = self.metrics['cpu_usage'][-1]['percent']
        
        return stats
    
    def get_performance_alerts(self) -> List[Dict[str, Any]]:
        """获取性能告警"""
        alerts = []
        stats = self.get_performance_stats()
        
        # 错误率告警
        if stats['error_rate'] > 0.05:  # 5%错误率
            alerts.append({
                'type': 'high_error_rate',
                'severity': 'warning' if stats['error_rate'] < 0.1 else 'critical',
                'message': f"Error rate is {stats['error_rate']:.2%}",
                'timestamp': time.time()
            })
        
        # 响应时间告警
        if stats['p95_request_duration'] > 5.0:  # 5秒
            alerts.append({
                'type': 'high_response_time',
                'severity': 'warning' if stats['p95_request_duration'] < 10.0 else 'critical',
                'message': f"P95 response time is {stats['p95_request_duration']:.2f}s",
                'timestamp': time.time()
            })
        
        # 内存使用告警
        if stats['current_memory_usage'] > 80:  # 80%
            alerts.append({
                'type': 'high_memory_usage',
                'severity': 'warning' if stats['current_memory_usage'] < 90 else 'critical',
                'message': f"Memory usage is {stats['current_memory_usage']:.1f}%",
                'timestamp': time.time()
            })
        
        # CPU使用告警
        if stats['current_cpu_usage'] > 80:  # 80%
            alerts.append({
                'type': 'high_cpu_usage',
                'severity': 'warning' if stats['current_cpu_usage'] < 90 else 'critical',
                'message': f"CPU usage is {stats['current_cpu_usage']:.1f}%",
                'timestamp': time.time()
            })
        
        return alerts

# 性能分析器
class PerformanceProfiler:
    """性能分析器"""
    
    def __init__(self):
        self.profiling_data = {}
        self.enabled = False
    
    def start_profiling(self, profile_name: str):
        """开始性能分析"""
        self.enabled = True
        self.profiling_data[profile_name] = {
            'start_time': time.time(),
            'memory_start': self._get_memory_usage(),
            'calls': []
        }
    
    def record_call(self, profile_name: str, call_name: str, duration: float, memory_delta: int = 0):
        """记录函数调用"""
        if not self.enabled or profile_name not in self.profiling_data:
            return
        
        self.profiling_data[profile_name]['calls'].append({
            'name': call_name,
            'duration': duration,
            'memory_delta': memory_delta,
            'timestamp': time.time()
        })
    
    def stop_profiling(self, profile_name: str) -> Dict[str, Any]:
        """停止性能分析并返回结果"""
        if profile_name not in self.profiling_data:
            return {}
        
        profile_data = self.profiling_data[profile_name]
        profile_data['end_time'] = time.time()
        profile_data['memory_end'] = self._get_memory_usage()
        profile_data['total_duration'] = profile_data['end_time'] - profile_data['start_time']
        profile_data['total_memory_delta'] = profile_data['memory_end'] - profile_data['memory_start']
        
        # 分析调用统计
        calls = profile_data['calls']
        if calls:
            profile_data['call_stats'] = {
                'total_calls': len(calls),
                'avg_duration': sum(c['duration'] for c in calls) / len(calls),
                'max_duration': max(c['duration'] for c in calls),
                'min_duration': min(c['duration'] for c in calls),
                'total_memory_usage': sum(c['memory_delta'] for c in calls)
            }
        
        return profile_data
    
    def _get_memory_usage(self) -> int:
        """获取当前内存使用"""
        import psutil
        return psutil.Process().memory_info().rss

40.9 性能优化最佳实践

40.9.1 优化策略总结

基于上述实现，剪映小助手的性能优化策略可以总结为以下几个关键原则：

1. 异步优先原则

所有I/O操作都应该是异步的
使用线程池处理CPU密集型任务
避免阻塞主事件循环

2. 缓存最大化原则

实现多级缓存架构
智能预加载机制
合理的缓存失效策略

3. 内存效率原则

使用对象池重用对象
及时释放不再使用的资源
监控内存使用情况

4. 并发控制原则

合理的并发限制
读写锁优化
连接池管理

5. 数据访问优化原则

批量操作优先
索引优化
查询缓存

40.9.2 性能测试框架

python 复制代码

# 性能测试框架
class PerformanceTestFramework:
    """性能测试框架"""
    
    def __init__(self):
        self.test_cases = []
        self.results = []
    
    def add_test_case(self, name: str, test_func, iterations: int = 100):
        """添加测试用例"""
        self.test_cases.append({
            'name': name,
            'func': test_func,
            'iterations': iterations
        })
    
    async def run_performance_tests(self) -> List[Dict[str, Any]]:
        """运行性能测试"""
        self.results = []
        
        for test_case in self.test_cases:
            result = await self._run_single_test(test_case)
            self.results.append(result)
        
        return self.results
    
    async def _run_single_test(self, test_case: Dict[str, Any]) -> Dict[str, Any]:
        """运行单个测试"""
        import time
        import statistics
        
        durations = []
        memory_usage = []
        
        for i in range(test_case['iterations']):
            # 记录开始状态
            start_time = time.time()
            start_memory = self._get_memory_usage()
            
            try:
                # 执行测试函数
                if asyncio.iscoroutinefunction(test_case['func']):
                    await test_case['func']()
                else:
                    test_case['func']()
                
                # 记录结束状态
                end_time = time.time()
                end_memory = self._get_memory_usage()
                
                durations.append(end_time - start_time)
                memory_usage.append(end_memory - start_memory)
                
            except Exception as e:
                logger.error(f"Test {test_case['name']} iteration {i} failed: {e}")
        
        # 计算统计信息
        result = {
            'name': test_case['name'],
            'iterations': len(durations),
            'avg_duration': statistics.mean(durations) if durations else 0,
            'min_duration': min(durations) if durations else 0,
            'max_duration': max(durations) if durations else 0,
            'std_duration': statistics.stdev(durations) if len(durations) > 1 else 0,
            'avg_memory_usage': statistics.mean(memory_usage) if memory_usage else 0,
            'total_time': sum(durations)
        }
        
        return result
    
    def _get_memory_usage(self) -> int:
        """获取内存使用"""
        import psutil
        return psutil.Process().memory_info().rss
    
    def generate_performance_report(self) -> str:
        """生成性能报告"""
        if not self.results:
            return "No test results available"
        
        report_lines = [
            "# Performance Test Report",
            f"Generated at: {time.strftime('%Y-%m-%d %H:%M:%S')}",
            "",
            "## Summary",
            ""
        ]
        
        total_time = sum(result['total_time'] for result in self.results)
        report_lines.append(f"Total test time: {total_time:.2f} seconds")
        report_lines.append(f"Total iterations: {sum(result['iterations'] for result in self.results)}")
        report_lines.append("")
        
        # 详细结果
        report_lines.append("## Detailed Results")
        report_lines.append("")
        
        for result in self.results:
            report_lines.append(f"### {result['name']}")
            report_lines.append(f"- Iterations: {result['iterations']}")
            report_lines.append(f"- Average duration: {result['avg_duration']:.4f}s")
            report_lines.append(f"- Min duration: {result['min_duration']:.4f}s")
            report_lines.append(f"- Max duration: {result['max_duration']:.4f}s")
            report_lines.append(f"- Standard deviation: {result['std_duration']:.4f}s")
            report_lines.append(f"- Average memory usage: {result['avg_memory_usage'] / 1024 / 1024:.2f} MB")
            report_lines.append(f"- Total time: {result['total_time']:.2f}s")
            report_lines.append("")
        
        return "\\n".join(report_lines)

# 使用示例
async def run_performance_tests():
    """运行性能测试"""
    framework = PerformanceTestFramework()
    
    # 添加缓存性能测试
    framework.add_test_case(
        "Cache Performance",
        test_cache_performance,
        iterations=1000
    )
    
    # 添加数据库查询性能测试
    framework.add_test_case(
        "Database Query Performance", 
        test_database_performance,
        iterations=500
    )
    
    # 添加视频处理性能测试
    framework.add_test_case(
        "Video Processing Performance",
        test_video_processing_performance,
        iterations=50
    )
    
    # 运行测试
    results = await framework.run_performance_tests()
    
    # 生成报告
    report = framework.generate_performance_report()
    print(report)
    
    return results

附录

代码仓库地址：

GitHub: https://github.com/Hommy-master/capcut-mate
Gitee: https://gitee.com/taohongmin-gitee/capcut-mate

接口文档地址：

API文档地址: https://docs.jcaigc.cn