第40章:性能优化策略
40.1 概述
性能优化是剪映小助手架构设计中的核心考量之一。作为一个处理视频编辑任务的应用,系统需要在处理大量媒体文件、复杂特效渲染和实时预览等功能时保持高效稳定。本章将详细介绍系统中采用的各类性能优化策略,包括异步处理优化、缓存机制优化、内存管理、并发控制等关键技术。
40.2 异步处理优化
40.2.1 异步I/O操作
剪映小助手广泛采用异步I/O操作来提升系统的并发处理能力,特别是在文件读写和网络请求方面:
python
# 异步文件操作示例
import asyncio
import aiofiles
async def async_read_file(file_path: str) -> str:
"""异步读取文件内容"""
async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
content = await f.read()
return content
async def async_write_file(file_path: str, content: str) -> None:
"""异步写入文件内容"""
async with aiofiles.open(file_path, 'w', encoding='utf-8') as f:
await f.write(content)
40.2.2 异步任务处理
系统中的视频生成任务采用异步处理模式,避免阻塞主线程:
python
# 基于src/pyJianYingDraft/core/task.py的实现
class AsyncTaskProcessor:
"""异步任务处理器"""
def __init__(self, max_workers: int = 4):
self.executor = ThreadPoolExecutor(max_workers=max_workers)
self.loop = asyncio.get_event_loop()
async def process_video_task(self, task_data: Dict[str, Any]) -> str:
"""异步处理视频任务"""
# 在线程池中执行耗时的视频处理操作
result = await self.loop.run_in_executor(
self.executor,
self._sync_process_video,
task_data
)
return result
def _sync_process_video(self, task_data: Dict[str, Any]) -> str:
"""同步视频处理方法"""
# 实际的视频处理逻辑
pass
40.2.3 异步数据库操作
对于数据库操作,系统同样采用异步模式以提升性能:
python
# 异步数据库连接池
from databases import Database
class AsyncDatabaseManager:
"""异步数据库管理器"""
def __init__(self, database_url: str):
self.database = Database(database_url)
self._connection_pool_size = 10
async def connect(self):
"""建立数据库连接"""
await self.database.connect()
async def execute_query(self, query: str, values: dict = None):
"""执行异步查询"""
return await self.database.fetch_all(query, values)
40.3 缓存机制优化
40.3.1 多级缓存架构
系统采用多级缓存架构,包括内存缓存、文件缓存和分布式缓存:
python
# 基于src/utils/draft_cache.py的实现
from collections import OrderedDict
import time
import threading
class MultiLevelCache:
"""多级缓存系统"""
def __init__(self, max_memory_cache: int = 1000,
cache_ttl: int = 3600):
self.memory_cache = OrderedDict()
self.file_cache_dir = "/tmp/cache"
self.cache_ttl = cache_ttl
self.max_memory_cache = max_memory_cache
self._lock = threading.RLock()
# 确保缓存目录存在
os.makedirs(self.file_cache_dir, exist_ok=True)
def get(self, key: str) -> Any:
"""获取缓存值"""
with self._lock:
# 先检查内存缓存
if key in self.memory_cache:
value, timestamp = self.memory_cache[key]
if time.time() - timestamp < self.cache_ttl:
# 移动到末尾(LRU)
self.memory_cache.move_to_end(key)
return value
else:
# 过期,删除
del self.memory_cache[key]
# 检查文件缓存
file_cache_path = os.path.join(self.file_cache_dir, f"{key}.cache")
if os.path.exists(file_cache_path):
with open(file_cache_path, 'rb') as f:
cached_data = pickle.load(f)
if time.time() - cached_data['timestamp'] < self.cache_ttl:
# 重新加载到内存缓存
self.put(key, cached_data['value'])
return cached_data['value']
return None
def put(self, key: str, value: Any) -> None:
"""设置缓存值"""
with self._lock:
# 添加到内存缓存
self.memory_cache[key] = (value, time.time())
self.memory_cache.move_to_end(key)
# 如果超出限制,移除最旧的项
if len(self.memory_cache) > self.max_memory_cache:
self.memory_cache.popitem(last=False)
# 同时保存到文件缓存
file_cache_path = os.path.join(self.file_cache_dir, f"{key}.cache")
cached_data = {
'value': value,
'timestamp': time.time()
}
with open(file_cache_path, 'wb') as f:
pickle.dump(cached_data, f)
40.3.2 智能缓存预加载
系统实现智能缓存预加载机制,提前加载可能需要的数据:
python
class SmartCachePreloader:
"""智能缓存预加载器"""
def __init__(self, cache_manager: MultiLevelCache):
self.cache_manager = cache_manager
self.preload_patterns = {
'video_segments': ['popular_videos', 'recent_videos'],
'effects': ['commonly_used_effects', 'trending_effects'],
'templates': ['popular_templates', 'recent_templates']
}
self.preload_thread = None
self.stop_preload = False
def start_preload(self):
"""启动预加载线程"""
self.preload_thread = threading.Thread(target=self._preload_worker)
self.preload_thread.daemon = True
self.preload_thread.start()
def _preload_worker(self):
"""预加载工作线程"""
while not self.stop_preload:
for category, keys in self.preload_patterns.items():
for key in keys:
if not self.cache_manager.get(key):
# 异步加载数据到缓存
data = self._load_data_for_key(category, key)
if data:
self.cache_manager.put(key, data)
# 每隔一段时间检查一次
time.sleep(300) # 5分钟
def _load_data_for_key(self, category: str, key: str) -> Any:
"""根据类别和键加载数据"""
# 实际的数据加载逻辑
pass
40.4 内存管理优化
40.4.1 对象池模式
系统采用对象池模式来重用频繁创建和销毁的对象:
python
# 基于src/pyJianYingDraft/core/pool.py的实现
import weakref
from typing import TypeVar, Generic, List, Optional
T = TypeVar('T')
class ObjectPool(Generic[T]):
"""通用对象池"""
def __init__(self, factory_func, reset_func=None, max_size: int = 100):
self.factory_func = factory_func
self.reset_func = reset_func
self.max_size = max_size
self._pool: List[T] = []
self._in_use: weakref.WeakSet[T] = weakref.WeakSet()
self._lock = threading.Lock()
def acquire(self) -> T:
"""从池中获取对象"""
with self._lock:
if self._pool:
obj = self._pool.pop()
else:
obj = self.factory_func()
self._in_use.add(obj)
return obj
def release(self, obj: T) -> None:
"""将对象归还到池中"""
with self._lock:
if obj in self._in_use:
self._in_use.remove(obj)
if len(self._pool) < self.max_size:
if self.reset_func:
self.reset_func(obj)
self._pool.append(obj)
def clear(self) -> None:
"""清空对象池"""
with self._lock:
self._pool.clear()
self._in_use.clear()
# 视频帧对象池示例
class VideoFramePool:
"""视频帧对象池"""
def __init__(self, width: int, height: int, max_size: int = 50):
self.width = width
self.height = height
self.pool = ObjectPool(
factory_func=lambda: np.zeros((height, width, 3), dtype=np.uint8),
reset_func=lambda frame: frame.fill(0),
max_size=max_size
)
def get_frame(self) -> np.ndarray:
"""获取视频帧"""
return self.pool.acquire()
def return_frame(self, frame: np.ndarray) -> None:
"""归还视频帧"""
self.pool.release(frame)
40.4.2 内存映射文件
对于大文件处理,系统使用内存映射文件技术:
python
import mmap
import os
class MemoryMappedFileHandler:
"""内存映射文件处理器"""
def __init__(self, file_path: str):
self.file_path = file_path
self.file_size = os.path.getsize(file_path)
self._mmap = None
self._file = None
def __enter__(self):
"""上下文管理器进入"""
self._file = open(self.file_path, 'rb')
self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""上下文管理器退出"""
if self._mmap:
self._mmap.close()
if self._file:
self._file.close()
def read_chunk(self, offset: int, size: int) -> bytes:
"""读取文件块"""
if self._mmap and offset + size <= self.file_size:
return self._mmap[offset:offset + size]
return b''
def get_buffer(self) -> mmap.mmap:
"""获取内存映射缓冲区"""
return self._mmap
# 大视频文件处理示例
async def process_large_video(file_path: str, chunk_size: int = 1024*1024):
"""处理大视频文件"""
with MemoryMappedFileHandler(file_path) as handler:
file_size = handler.file_size
processed_size = 0
while processed_size < file_size:
chunk = handler.read_chunk(processed_size, chunk_size)
if not chunk:
break
# 异步处理数据块
await process_video_chunk(chunk)
processed_size += len(chunk)
# 定期让出控制权,避免阻塞
if processed_size % (chunk_size * 10) == 0:
await asyncio.sleep(0)
40.4.3 垃圾回收优化
系统通过调整垃圾回收参数来优化内存使用:
python
import gc
import sys
class GCOptimizer:
"""垃圾回收优化器"""
def __init__(self):
self.original_thresholds = gc.get_threshold()
self.optimized_thresholds = (700, 10, 10) # 更激进的垃圾回收
def enable_optimized_gc(self):
"""启用优化的垃圾回收"""
gc.set_threshold(*self.optimized_thresholds)
gc.enable()
def disable_gc_for_critical_section(self):
"""在关键代码段禁用垃圾回收"""
gc.disable()
def restore_gc(self):
"""恢复垃圾回收设置"""
gc.set_threshold(*self.original_thresholds)
gc.enable()
def force_collection(self, generation: int = 2):
"""强制进行垃圾回收"""
collected = gc.collect(generation)
return collected
def get_memory_stats(self) -> Dict[str, Any]:
"""获取内存统计信息"""
return {
'gc_stats': gc.get_stats(),
'objects_count': len(gc.get_objects()),
'garbage_count': len(gc.garbage),
'thresholds': gc.get_threshold(),
'memory_usage': sys.getsizeof(gc.get_objects())
}
# 内存监控器
class MemoryMonitor:
"""内存监控器"""
def __init__(self, check_interval: int = 60):
self.check_interval = check_interval
self.monitoring = False
self.monitor_thread = None
self.memory_threshold = 1024 * 1024 * 1024 # 1GB
def start_monitoring(self):
"""开始内存监控"""
self.monitoring = True
self.monitor_thread = threading.Thread(target=self._monitor_worker)
self.monitor_thread.daemon = True
self.monitor_thread.start()
def _monitor_worker(self):
"""监控工作线程"""
import psutil
while self.monitoring:
memory_info = psutil.virtual_memory()
if memory_info.used > self.memory_threshold:
# 内存使用过高,触发垃圾回收
gc_optimizer = GCOptimizer()
gc_optimizer.force_collection()
# 清理缓存
self._cleanup_caches()
logger.warning(f"High memory usage detected: {memory_info.percent}%")
time.sleep(self.check_interval)
def _cleanup_caches(self):
"""清理各类缓存"""
# 清理草稿缓存
if hasattr(draft_cache, 'DRAFT_CACHE'):
draft_cache.DRAFT_CACHE.clear()
# 清理其他缓存
# ...
40.5 并发控制优化
40.5.1 连接池管理
系统实现数据库连接池和HTTP连接池管理:
python
# 数据库连接池
from sqlalchemy.pool import QueuePool
import sqlalchemy as sa
class DatabaseConnectionPool:
"""数据库连接池管理器"""
def __init__(self, database_url: str, pool_size: int = 20,
max_overflow: int = 40, pool_timeout: int = 30):
self.engine = sa.create_engine(
database_url,
poolclass=QueuePool,
pool_size=pool_size,
max_overflow=max_overflow,
pool_timeout=pool_timeout,
pool_pre_ping=True, # 连接健康检查
pool_recycle=3600 # 连接回收时间
)
def get_connection(self):
"""获取数据库连接"""
return self.engine.connect()
def execute_query(self, query: str, params: dict = None):
"""执行查询"""
with self.get_connection() as conn:
result = conn.execute(sa.text(query), params or {})
return result.fetchall()
# HTTP连接池
import aiohttp
import asyncio
class HTTPConnectionPool:
"""HTTP连接池管理器"""
def __init__(self, max_connections: int = 100,
max_connections_per_host: int = 10,
timeout: int = 30):
self.connector = aiohttp.TCPConnector(
limit=max_connections,
limit_per_host=max_connections_per_host,
ttl_dns_cache=300,
use_dns_cache=True,
keepalive_timeout=30
)
self.timeout = aiohttp.ClientTimeout(total=timeout)
self._session = None
@property
def session(self) -> aiohttp.ClientSession:
"""获取HTTP会话"""
if self._session is None or self._session.closed:
self._session = aiohttp.ClientSession(
connector=self.connector,
timeout=self.timeout
)
return self._session
async def close(self):
"""关闭连接池"""
if self._session and not self._session.closed:
await self._session.close()
40.5.2 信号量控制
系统使用信号量来控制并发访问:
python
# 基于src/service/video_gen.py的实现
import asyncio
from typing import Dict, Any
import threading
class ConcurrencyController:
"""并发控制器"""
def __init__(self, max_concurrent_tasks: int = 5):
self.max_concurrent_tasks = max_concurrent_tasks
self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
self.active_tasks: Dict[str, asyncio.Task] = {}
self._lock = threading.Lock()
async def execute_with_limit(self, task_id: str, coro_func, *args, **kwargs):
"""在并发限制下执行任务"""
async with self.semaphore:
with self._lock:
if task_id in self.active_tasks:
raise ValueError(f"Task {task_id} is already running")
try:
task = asyncio.create_task(coro_func(*args, **kwargs))
with self._lock:
self.active_tasks[task_id] = task
result = await task
return result
finally:
with self._lock:
self.active_tasks.pop(task_id, None)
def get_active_task_count(self) -> int:
"""获取当前活跃任务数"""
with self._lock:
return len(self.active_tasks)
async def cancel_task(self, task_id: str) -> bool:
"""取消指定任务"""
with self._lock:
task = self.active_tasks.get(task_id)
if task and not task.done():
task.cancel()
return True
return False
# 视频生成任务并发控制
class VideoGenConcurrencyController(ConcurrencyController):
"""视频生成并发控制器"""
def __init__(self):
# 根据系统资源动态调整最大并发数
max_tasks = self._calculate_max_concurrent_tasks()
super().__init__(max_tasks)
def _calculate_max_concurrent_tasks(self) -> int:
"""根据系统资源计算最大并发任务数"""
import psutil
cpu_count = psutil.cpu_count()
memory_gb = psutil.virtual_memory().total / (1024**3)
# 基于CPU核心数和内存大小计算
cpu_based_limit = cpu_count * 2
memory_based_limit = int(memory_gb / 2) # 每2GB内存支持1个并发任务
return min(cpu_based_limit, memory_based_limit, 10) # 最多10个
40.5.3 读写锁优化
对于读多写少的场景,系统使用读写锁优化:
python
import threading
from typing import Any, Dict, List, Optional
class ReadWriteLock:
"""读写锁实现"""
def __init__(self):
self._read_ready = threading.Condition(threading.RLock())
self._readers = 0
self._writers = 0
self._write_waiters = 0
def acquire_read(self):
"""获取读锁"""
self._read_ready.acquire()
try:
while self._writers > 0 or self._write_waiters > 0:
self._read_ready.wait()
self._readers += 1
finally:
self._read_ready.release()
def release_read(self):
"""释放读锁"""
self._read_ready.acquire()
try:
self._readers -= 1
if self._readers == 0:
self._read_ready.notifyAll()
finally:
self._read_ready.release()
def acquire_write(self):
"""获取写锁"""
self._read_ready.acquire()
self._write_waiters += 1
try:
while self._readers > 0 or self._writers > 0:
self._read_ready.wait()
self._write_waiters -= 1
self._writers += 1
finally:
self._read_ready.release()
def release_write(self):
"""释放写锁"""
self._read_ready.acquire()
try:
self._writers -= 1
self._read_ready.notifyAll()
finally:
self._read_ready.release()
# 缓存数据结构的读写锁应用
class ConcurrentCache:
"""并发安全缓存"""
def __init__(self):
self._cache: Dict[str, Any] = {}
self._lock = ReadWriteLock()
def get(self, key: str) -> Optional[Any]:
"""获取缓存值(读操作)"""
self._lock.acquire_read()
try:
return self._cache.get(key)
finally:
self._lock.release_read()
def get_all(self) -> Dict[str, Any]:
"""获取所有缓存(读操作)"""
self._lock.acquire_read()
try:
return self._cache.copy()
finally:
self._lock.release_read()
def put(self, key: str, value: Any) -> None:
"""设置缓存值(写操作)"""
self._lock.acquire_write()
try:
self._cache[key] = value
finally:
self._lock.release_write()
def remove(self, key: str) -> bool:
"""删除缓存值(写操作)"""
self._lock.acquire_write()
try:
return self._cache.pop(key, None) is not None
finally:
self._lock.release_write()
40.6 数据库性能优化
40.6.1 连接池和查询优化
python
# 数据库查询优化器
class DatabaseQueryOptimizer:
"""数据库查询优化器"""
def __init__(self, engine):
self.engine = engine
self.query_cache = {}
self.cache_ttl = 300 # 5分钟缓存
def optimize_query(self, query: str, params: dict = None) -> str:
"""优化SQL查询"""
# 添加查询提示
optimized_query = self._add_query_hints(query)
# 使用预编译语句
if params:
optimized_query = self._parameterize_query(optimized_query, params)
return optimized_query
def _add_query_hints(self, query: str) -> str:
"""添加查询优化提示"""
hints = []
# 对于大表查询,添加索引提示
if 'SELECT' in query.upper() and 'video_segments' in query.lower():
hints.append('USE INDEX (idx_video_segment_time)')
if hints:
# 在SELECT后插入提示
return query.replace('SELECT', f"SELECT {' '.join(hints)}", 1)
return query
def _parameterize_query(self, query: str, params: dict) -> str:
"""参数化查询"""
# 使用命名参数防止SQL注入
return query
async def execute_optimized_query(self, query: str, params: dict = None) -> List[Dict]:
"""执行优化的查询"""
# 检查查询缓存
cache_key = f"{query}_{hash(str(params))}"
current_time = time.time()
if cache_key in self.query_cache:
cached_result, timestamp = self.query_cache[cache_key]
if current_time - timestamp < self.cache_ttl:
return cached_result
# 优化查询
optimized_query = self.optimize_query(query, params)
# 执行查询
async with self.engine.acquire() as conn:
result = await conn.fetch(optimized_query, params)
# 缓存结果
self.query_cache[cache_key] = (result, current_time)
return result
# 索引管理器
class IndexManager:
"""数据库索引管理器"""
def __init__(self, engine):
self.engine = engine
self.recommended_indexes = {
'video_segments': [
'CREATE INDEX idx_video_segment_time ON video_segments(start_time, end_time)',
'CREATE INDEX idx_video_segment_material ON video_segments(material_id)',
'CREATE INDEX idx_video_segment_track ON video_segments(track_id)'
],
'effects': [
'CREATE INDEX idx_effect_type ON effects(effect_type)',
'CREATE INDEX idx_effect_segment ON effects(segment_id)',
'CREATE INDEX idx_effect_time ON effects(start_time, end_time)'
],
'drafts': [
'CREATE INDEX idx_draft_created ON drafts(created_at)',
'CREATE INDEX idx_draft_status ON drafts(status)',
'CREATE INDEX idx_draft_user ON drafts(user_id)'
]
}
async def create_recommended_indexes(self):
"""创建推荐的索引"""
for table, indexes in self.recommended_indexes.items():
for index_sql in indexes:
try:
async with self.engine.acquire() as conn:
await conn.execute(index_sql)
logger.info(f"Created index: {index_sql}")
except Exception as e:
logger.warning(f"Failed to create index {index_sql}: {e}")
async def analyze_table_performance(self, table_name: str) -> Dict[str, Any]:
"""分析表性能"""
# 获取表统计信息
query = """
SELECT
table_rows,
data_length,
index_length,
auto_increment
FROM information_schema.tables
WHERE table_schema = DATABASE() AND table_name = :table_name
"""
async with self.engine.acquire() as conn:
stats = await conn.fetch(query, {'table_name': table_name})
if stats:
return {
'table_rows': stats[0]['table_rows'],
'data_size': stats[0]['data_length'],
'index_size': stats[0]['index_length'],
'total_size': stats[0]['data_length'] + stats[0]['index_length'],
'auto_increment': stats[0]['auto_increment']
}
return {}
40.6.2 批量操作优化
python
# 批量插入优化器
class BatchInsertOptimizer:
"""批量插入优化器"""
def __init__(self, engine, batch_size: int = 1000):
self.engine = engine
self.batch_size = batch_size
self.pending_inserts = []
def add_insert(self, table: str, data: Dict[str, Any]):
"""添加待插入数据"""
self.pending_inserts.append((table, data))
# 如果达到批次大小,执行批量插入
if len(self.pending_inserts) >= self.batch_size:
self.execute_batch_inserts()
async def execute_batch_inserts(self):
"""执行批量插入"""
if not self.pending_inserts:
return
# 按表分组
table_groups = {}
for table, data in self.pending_inserts:
if table not in table_groups:
table_groups[table] = []
table_groups[table].append(data)
# 批量插入每个表的数据
for table, data_list in table_groups.items():
await self._batch_insert_table(table, data_list)
# 清空待插入列表
self.pending_inserts.clear()
async def _batch_insert_table(self, table: str, data_list: List[Dict[str, Any]]):
"""批量插入单个表的数据"""
if not data_list:
return
# 构建批量插入SQL
columns = list(data_list[0].keys())
placeholders = ', '.join([f':{col}' for col in columns])
columns_str = ', '.join(columns)
insert_sql = f"""
INSERT INTO {table} ({columns_str})
VALUES ({placeholders})
"""
# 执行批量插入
async with self.engine.acquire() as conn:
await conn.executemany(insert_sql, data_list)
logger.info(f"Batch inserted {len(data_list)} records into {table}")
async def flush(self):
"""刷新所有待插入数据"""
await self.execute_batch_inserts()
# 批量更新优化器
class BatchUpdateOptimizer:
"""批量更新优化器"""
def __init__(self, engine):
self.engine = engine
async def batch_update_with_case(self, table: str, update_data: List[Dict[str, Any]],
id_column: str = 'id'):
"""使用CASE语句进行批量更新"""
if not update_data:
return
# 获取要更新的列(排除ID列)
update_columns = [col for col in update_data[0].keys() if col != id_column]
# 构建CASE语句
case_statements = []
for column in update_columns:
case_parts = []
for data in update_data:
case_parts.append(f"WHEN {id_column} = {data[id_column]} THEN {data[column]}")
case_statement = f"{column} = CASE {' '.join(case_parts)} END"
case_statements.append(case_statement)
# 构建WHERE子句
ids = [str(data[id_column]) for data in update_data]
where_clause = f"{id_column} IN ({','.join(ids)})"
# 构建完整的UPDATE语句
update_sql = f"""
UPDATE {table}
SET {', '.join(case_statements)}
WHERE {where_clause}
"""
# 执行更新
async with self.engine.acquire() as conn:
await conn.execute(update_sql)
logger.info(f"Batch updated {len(update_data)} records in {table}")
40.7 网络性能优化
40.7.1 HTTP请求优化
python
# HTTP请求优化器
class HTTPRequestOptimizer:
"""HTTP请求优化器"""
def __init__(self, connection_pool: HTTPConnectionPool):
self.connection_pool = connection_pool
self.request_cache = {}
self.cache_ttl = 300 # 5分钟
self.compression_enabled = True
async def optimized_get(self, url: str, headers: dict = None,
cache_key: str = None, **kwargs) -> Dict[str, Any]:
"""优化的GET请求"""
# 检查缓存
if cache_key and cache_key in self.request_cache:
cached_result, timestamp = self.request_cache[cache_key]
if time.time() - timestamp < self.cache_ttl:
return cached_result
# 设置优化头
optimized_headers = headers or {}
if self.compression_enabled:
optimized_headers['Accept-Encoding'] = 'gzip, deflate'
optimized_headers['Connection'] = 'keep-alive'
# 执行请求
session = self.connection_pool.session
async with session.get(url, headers=optimized_headers, **kwargs) as response:
result = {
'status': response.status,
'headers': dict(response.headers),
'content': await response.read()
}
# 缓存结果
if cache_key:
self.request_cache[cache_key] = (result, time.time())
return result
async def optimized_post(self, url: str, data: Any = None,
headers: dict = None, **kwargs) -> Dict[str, Any]:
"""优化的POST请求"""
optimized_headers = headers or {}
optimized_headers['Content-Type'] = 'application/json'
if self.compression_enabled and isinstance(data, (dict, list)):
# 压缩大数据
import gzip
import json
json_data = json.dumps(data).encode('utf-8')
if len(json_data) > 1024: # 大于1KB的数据进行压缩
compressed_data = gzip.compress(json_data)
optimized_headers['Content-Encoding'] = 'gzip'
data = compressed_data
session = self.connection_pool.session
async with session.post(url, data=data, headers=optimized_headers, **kwargs) as response:
return {
'status': response.status,
'headers': dict(response.headers),
'content': await response.read()
}
# CDN优化器
class CDNOptimizer:
"""CDN优化器"""
def __init__(self, cdn_endpoints: List[str]):
self.cdn_endpoints = cdn_endpoints
self.endpoint_health = {}
self.health_check_interval = 60 # 60秒
self._health_check_task = None
async def start_health_check(self):
"""启动健康检查"""
self._health_check_task = asyncio.create_task(self._health_check_worker())
async def _health_check_worker(self):
"""健康检查工作线程"""
while True:
await self._check_all_endpoints()
await asyncio.sleep(self.health_check_interval)
async def _check_all_endpoints(self):
"""检查所有CDN端点"""
for endpoint in self.cdn_endpoints:
try:
start_time = time.time()
# 发送健康检查请求
response = await self._ping_endpoint(endpoint)
response_time = time.time() - start_time
self.endpoint_health[endpoint] = {
'status': 'healthy' if response.status < 400 else 'unhealthy',
'response_time': response_time,
'last_check': time.time()
}
except Exception as e:
self.endpoint_health[endpoint] = {
'status': 'unhealthy',
'error': str(e),
'last_check': time.time()
}
def get_optimal_endpoint(self) -> str:
"""获取最优的CDN端点"""
healthy_endpoints = [
ep for ep, health in self.endpoint_health.items()
if health.get('status') == 'healthy'
]
if not healthy_endpoints:
# 如果没有健康的端点,返回默认端点
return self.cdn_endpoints[0]
# 选择响应时间最短的端点
optimal_endpoint = min(
healthy_endpoints,
key=lambda ep: self.endpoint_health[ep].get('response_time', float('inf'))
)
return optimal_endpoint
40.7.2 数据压缩优化
python
# 数据压缩管理器
class DataCompressionManager:
"""数据压缩管理器"""
def __init__(self):
self.compression_threshold = 1024 # 1KB
self.compression_methods = {
'gzip': self._gzip_compress,
'deflate': self._deflate_compress,
'brotli': self._brotli_compress
}
self.preferred_method = 'gzip'
def compress_data(self, data: bytes, method: str = None) -> tuple:
"""压缩数据"""
if len(data) < self.compression_threshold:
return data, None # 数据太小,不压缩
method = method or self.preferred_method
if method not in self.compression_methods:
raise ValueError(f"Unsupported compression method: {method}")
compressor = self.compression_methods[method]
compressed_data = compressor(data)
# 检查压缩效果
compression_ratio = len(compressed_data) / len(data)
if compression_ratio > 0.9: # 压缩效果不佳
return data, None
return compressed_data, method
def decompress_data(self, data: bytes, method: str) -> bytes:
"""解压缩数据"""
if method is None:
return data # 未压缩
decompression_methods = {
'gzip': self._gzip_decompress,
'deflate': self._deflate_decompress,
'brotli': self._brotli_decompress
}
if method not in decompression_methods:
raise ValueError(f"Unsupported decompression method: {method}")
decompressor = decompression_methods[method]
return decompressor(data)
def _gzip_compress(self, data: bytes) -> bytes:
"""GZIP压缩"""
import gzip
return gzip.compress(data)
def _gzip_decompress(self, data: bytes) -> bytes:
"""GZIP解压缩"""
import gzip
return gzip.decompress(data)
def _deflate_compress(self, data: bytes) -> bytes:
"""Deflate压缩"""
import zlib
return zlib.compress(data)
def _deflate_decompress(self, data: bytes) -> bytes:
"""Deflate解压缩"""
import zlib
return zlib.decompress(data)
def _brotli_compress(self, data: bytes) -> bytes:
"""Brotli压缩"""
import brotli
return brotli.compress(data)
def _brotli_decompress(self, data: bytes) -> bytes:
"""Brotli解压缩"""
import brotli
return brotli.decompress(data)
40.8 性能监控与调优
40.8.1 性能指标收集
python
# 性能监控器
class PerformanceMonitor:
"""性能监控器"""
def __init__(self):
self.metrics = {
'request_count': 0,
'request_duration': [],
'error_count': 0,
'cache_hit_rate': 0,
'memory_usage': [],
'cpu_usage': []
}
self.monitoring_enabled = True
self.collect_interval = 60 # 60秒
self._monitor_task = None
def start_monitoring(self):
"""启动性能监控"""
self._monitor_task = asyncio.create_task(self._monitor_worker())
async def _monitor_worker(self):
"""监控工作线程"""
import psutil
while self.monitoring_enabled:
# 收集系统指标
memory_info = psutil.virtual_memory()
cpu_percent = psutil.cpu_percent(interval=1)
self.metrics['memory_usage'].append({
'timestamp': time.time(),
'percent': memory_info.percent,
'used': memory_info.used,
'available': memory_info.available
})
self.metrics['cpu_usage'].append({
'timestamp': time.time(),
'percent': cpu_percent
})
# 限制数据点数量
if len(self.metrics['memory_usage']) > 1000:
self.metrics['memory_usage'] = self.metrics['memory_usage'][-1000:]
if len(self.metrics['cpu_usage']) > 1000:
self.metrics['cpu_usage'] = self.metrics['cpu_usage'][-1000:]
await asyncio.sleep(self.collect_interval)
def record_request(self, duration: float, success: bool = True):
"""记录请求"""
self.metrics['request_count'] += 1
self.metrics['request_duration'].append(duration)
if not success:
self.metrics['error_count'] += 1
# 限制数据点数量
if len(self.metrics['request_duration']) > 10000:
self.metrics['request_duration'] = self.metrics['request_duration'][-10000:]
def get_performance_stats(self) -> Dict[str, Any]:
"""获取性能统计"""
stats = {
'total_requests': self.metrics['request_count'],
'error_rate': self.metrics['error_count'] / max(self.metrics['request_count'], 1),
'avg_request_duration': 0,
'p95_request_duration': 0,
'p99_request_duration': 0,
'current_memory_usage': 0,
'current_cpu_usage': 0
}
# 计算请求持续时间统计
if self.metrics['request_duration']:
durations = sorted(self.metrics['request_duration'])
stats['avg_request_duration'] = sum(durations) / len(durations)
stats['p95_request_duration'] = durations[int(len(durations) * 0.95)]
stats['p99_request_duration'] = durations[int(len(durations) * 0.99)]
# 获取当前系统资源使用情况
if self.metrics['memory_usage']:
stats['current_memory_usage'] = self.metrics['memory_usage'][-1]['percent']
if self.metrics['cpu_usage']:
stats['current_cpu_usage'] = self.metrics['cpu_usage'][-1]['percent']
return stats
def get_performance_alerts(self) -> List[Dict[str, Any]]:
"""获取性能告警"""
alerts = []
stats = self.get_performance_stats()
# 错误率告警
if stats['error_rate'] > 0.05: # 5%错误率
alerts.append({
'type': 'high_error_rate',
'severity': 'warning' if stats['error_rate'] < 0.1 else 'critical',
'message': f"Error rate is {stats['error_rate']:.2%}",
'timestamp': time.time()
})
# 响应时间告警
if stats['p95_request_duration'] > 5.0: # 5秒
alerts.append({
'type': 'high_response_time',
'severity': 'warning' if stats['p95_request_duration'] < 10.0 else 'critical',
'message': f"P95 response time is {stats['p95_request_duration']:.2f}s",
'timestamp': time.time()
})
# 内存使用告警
if stats['current_memory_usage'] > 80: # 80%
alerts.append({
'type': 'high_memory_usage',
'severity': 'warning' if stats['current_memory_usage'] < 90 else 'critical',
'message': f"Memory usage is {stats['current_memory_usage']:.1f}%",
'timestamp': time.time()
})
# CPU使用告警
if stats['current_cpu_usage'] > 80: # 80%
alerts.append({
'type': 'high_cpu_usage',
'severity': 'warning' if stats['current_cpu_usage'] < 90 else 'critical',
'message': f"CPU usage is {stats['current_cpu_usage']:.1f}%",
'timestamp': time.time()
})
return alerts
# 性能分析器
class PerformanceProfiler:
"""性能分析器"""
def __init__(self):
self.profiling_data = {}
self.enabled = False
def start_profiling(self, profile_name: str):
"""开始性能分析"""
self.enabled = True
self.profiling_data[profile_name] = {
'start_time': time.time(),
'memory_start': self._get_memory_usage(),
'calls': []
}
def record_call(self, profile_name: str, call_name: str, duration: float, memory_delta: int = 0):
"""记录函数调用"""
if not self.enabled or profile_name not in self.profiling_data:
return
self.profiling_data[profile_name]['calls'].append({
'name': call_name,
'duration': duration,
'memory_delta': memory_delta,
'timestamp': time.time()
})
def stop_profiling(self, profile_name: str) -> Dict[str, Any]:
"""停止性能分析并返回结果"""
if profile_name not in self.profiling_data:
return {}
profile_data = self.profiling_data[profile_name]
profile_data['end_time'] = time.time()
profile_data['memory_end'] = self._get_memory_usage()
profile_data['total_duration'] = profile_data['end_time'] - profile_data['start_time']
profile_data['total_memory_delta'] = profile_data['memory_end'] - profile_data['memory_start']
# 分析调用统计
calls = profile_data['calls']
if calls:
profile_data['call_stats'] = {
'total_calls': len(calls),
'avg_duration': sum(c['duration'] for c in calls) / len(calls),
'max_duration': max(c['duration'] for c in calls),
'min_duration': min(c['duration'] for c in calls),
'total_memory_usage': sum(c['memory_delta'] for c in calls)
}
return profile_data
def _get_memory_usage(self) -> int:
"""获取当前内存使用"""
import psutil
return psutil.Process().memory_info().rss
40.9 性能优化最佳实践
40.9.1 优化策略总结
基于上述实现,剪映小助手的性能优化策略可以总结为以下几个关键原则:
1. 异步优先原则
- 所有I/O操作都应该是异步的
- 使用线程池处理CPU密集型任务
- 避免阻塞主事件循环
2. 缓存最大化原则
- 实现多级缓存架构
- 智能预加载机制
- 合理的缓存失效策略
3. 内存效率原则
- 使用对象池重用对象
- 及时释放不再使用的资源
- 监控内存使用情况
4. 并发控制原则
- 合理的并发限制
- 读写锁优化
- 连接池管理
5. 数据访问优化原则
- 批量操作优先
- 索引优化
- 查询缓存
40.9.2 性能测试框架
python
# 性能测试框架
class PerformanceTestFramework:
"""性能测试框架"""
def __init__(self):
self.test_cases = []
self.results = []
def add_test_case(self, name: str, test_func, iterations: int = 100):
"""添加测试用例"""
self.test_cases.append({
'name': name,
'func': test_func,
'iterations': iterations
})
async def run_performance_tests(self) -> List[Dict[str, Any]]:
"""运行性能测试"""
self.results = []
for test_case in self.test_cases:
result = await self._run_single_test(test_case)
self.results.append(result)
return self.results
async def _run_single_test(self, test_case: Dict[str, Any]) -> Dict[str, Any]:
"""运行单个测试"""
import time
import statistics
durations = []
memory_usage = []
for i in range(test_case['iterations']):
# 记录开始状态
start_time = time.time()
start_memory = self._get_memory_usage()
try:
# 执行测试函数
if asyncio.iscoroutinefunction(test_case['func']):
await test_case['func']()
else:
test_case['func']()
# 记录结束状态
end_time = time.time()
end_memory = self._get_memory_usage()
durations.append(end_time - start_time)
memory_usage.append(end_memory - start_memory)
except Exception as e:
logger.error(f"Test {test_case['name']} iteration {i} failed: {e}")
# 计算统计信息
result = {
'name': test_case['name'],
'iterations': len(durations),
'avg_duration': statistics.mean(durations) if durations else 0,
'min_duration': min(durations) if durations else 0,
'max_duration': max(durations) if durations else 0,
'std_duration': statistics.stdev(durations) if len(durations) > 1 else 0,
'avg_memory_usage': statistics.mean(memory_usage) if memory_usage else 0,
'total_time': sum(durations)
}
return result
def _get_memory_usage(self) -> int:
"""获取内存使用"""
import psutil
return psutil.Process().memory_info().rss
def generate_performance_report(self) -> str:
"""生成性能报告"""
if not self.results:
return "No test results available"
report_lines = [
"# Performance Test Report",
f"Generated at: {time.strftime('%Y-%m-%d %H:%M:%S')}",
"",
"## Summary",
""
]
total_time = sum(result['total_time'] for result in self.results)
report_lines.append(f"Total test time: {total_time:.2f} seconds")
report_lines.append(f"Total iterations: {sum(result['iterations'] for result in self.results)}")
report_lines.append("")
# 详细结果
report_lines.append("## Detailed Results")
report_lines.append("")
for result in self.results:
report_lines.append(f"### {result['name']}")
report_lines.append(f"- Iterations: {result['iterations']}")
report_lines.append(f"- Average duration: {result['avg_duration']:.4f}s")
report_lines.append(f"- Min duration: {result['min_duration']:.4f}s")
report_lines.append(f"- Max duration: {result['max_duration']:.4f}s")
report_lines.append(f"- Standard deviation: {result['std_duration']:.4f}s")
report_lines.append(f"- Average memory usage: {result['avg_memory_usage'] / 1024 / 1024:.2f} MB")
report_lines.append(f"- Total time: {result['total_time']:.2f}s")
report_lines.append("")
return "\\n".join(report_lines)
# 使用示例
async def run_performance_tests():
"""运行性能测试"""
framework = PerformanceTestFramework()
# 添加缓存性能测试
framework.add_test_case(
"Cache Performance",
test_cache_performance,
iterations=1000
)
# 添加数据库查询性能测试
framework.add_test_case(
"Database Query Performance",
test_database_performance,
iterations=500
)
# 添加视频处理性能测试
framework.add_test_case(
"Video Processing Performance",
test_video_processing_performance,
iterations=50
)
# 运行测试
results = await framework.run_performance_tests()
# 生成报告
report = framework.generate_performance_report()
print(report)
return results
附录
代码仓库地址:
- GitHub:
https://github.com/Hommy-master/capcut-mate - Gitee:
https://gitee.com/taohongmin-gitee/capcut-mate
接口文档地址:
- API文档地址:
https://docs.jcaigc.cn