022_提示缓存与性能优化

提示缓存与性能优化

目录

缓存技术概述

什么是提示缓存

提示缓存是Claude API的一项优化功能,允许缓存提示的特定部分以便重复使用,从而显著减少处理时间和API调用成本。

核心优势

成本降低
  • 写入成本:缓存写入成本比常规输入高25%
  • 读取成本:缓存读取仅为常规输入成本的10%
  • 整体节省:频繁重复使用可节省高达90%的成本
性能提升
  • 响应速度:缓存命中时响应时间显著减少
  • 处理效率:避免重复计算相同的提示内容
  • 系统负载:减少服务器计算压力
应用灵活性
  • 智能缓存:自动识别可缓存的提示部分
  • 动态管理:5分钟的默认缓存生命周期
  • 增量更新:支持部分缓存更新

支持的模型

python 复制代码
SUPPORTED_MODELS = [
    "claude-opus-4-20250514",
    "claude-sonnet-4-20250514", 
    "claude-sonnet-3-7-20240229",
    "claude-sonnet-3-5-20240620",
    "claude-haiku-3-5-20241022",
    "claude-haiku-3-20240307",
    "claude-opus-3-20240229"
]

def check_cache_support(model_name):
    """检查模型是否支持缓存"""
    return model_name in SUPPORTED_MODELS

# 检查示例
if check_cache_support("claude-sonnet-4-20250514"):
    print("该模型支持提示缓存功能")

缓存工作原理

缓存机制详解

缓存创建流程
  1. 提示分析:系统分析提示内容识别可缓存部分
  2. 缓存写入:将指定内容写入缓存存储
  3. 缓存标识:生成唯一的缓存标识符
  4. 生命周期管理:设置5分钟的初始生命周期
缓存匹配逻辑
python 复制代码
import anthropic
import hashlib
import time

class PromptCacheManager:
    """提示缓存管理器"""
    
    def __init__(self):
        self.client = anthropic.Anthropic()
        self.cache_registry = {}
    
    def create_cache_key(self, content):
        """创建缓存键"""
        return hashlib.md5(content.encode()).hexdigest()
    
    def is_cacheable(self, content, model_type="sonnet"):
        """检查内容是否可缓存"""
        token_limits = {
            "opus": 1024,
            "sonnet": 1024, 
            "haiku": 2048
        }
        
        # 简化的token计算(实际应使用官方tokenizer)
        estimated_tokens = len(content.split()) * 1.3
        min_tokens = token_limits.get(model_type, 1024)
        
        return estimated_tokens >= min_tokens
    
    def cache_prompt_section(self, content, section_type="system"):
        """缓存提示部分"""
        
        if not self.is_cacheable(content):
            print(f"内容长度不足,无法缓存(需要至少1024个token)")
            return None
        
        cache_key = self.create_cache_key(content)
        
        cached_content = {
            "type": "text",
            "text": content,
            "cache_control": {"type": "ephemeral"}
        }
        
        self.cache_registry[cache_key] = {
            "content": cached_content,
            "created_at": time.time(),
            "access_count": 0
        }
        
        return cached_content
缓存生命周期
python 复制代码
def manage_cache_lifecycle(cache_manager):
    """管理缓存生命周期"""
    
    current_time = time.time()
    expired_keys = []
    
    for key, cache_info in cache_manager.cache_registry.items():
        # 检查是否超过5分钟
        if current_time - cache_info["created_at"] > 300:  # 5分钟 = 300秒
            expired_keys.append(key)
    
    # 清理过期缓存
    for key in expired_keys:
        del cache_manager.cache_registry[key]
        print(f"缓存 {key} 已过期并被清理")
    
    return len(expired_keys)

缓存匹配策略

前缀匹配算法
python 复制代码
def find_cache_match(new_prompt, cached_prompts):
    """查找缓存匹配"""
    
    best_match = None
    max_match_length = 0
    
    for cached_key, cached_content in cached_prompts.items():
        cached_text = cached_content["content"]["text"]
        
        # 计算公共前缀长度
        match_length = 0
        min_length = min(len(new_prompt), len(cached_text))
        
        for i in range(min_length):
            if new_prompt[i] == cached_text[i]:
                match_length += 1
            else:
                break
        
        # 更新最佳匹配
        if match_length > max_match_length and match_length > 0:
            max_match_length = match_length
            best_match = {
                "key": cached_key,
                "match_length": match_length,
                "cache_ratio": match_length / len(new_prompt)
            }
    
    return best_match

# 使用示例
cache_manager = PromptCacheManager()
new_prompt = "分析以下文档的主要内容和结构..."
match_result = find_cache_match(new_prompt, cache_manager.cache_registry)

if match_result and match_result["cache_ratio"] > 0.8:
    print(f"找到高质量缓存匹配,匹配度:{match_result['cache_ratio']:.2%}")

实现方法详解

基础缓存实现

系统消息缓存
python 复制代码
def create_cached_system_message(system_content):
    """创建带缓存的系统消息"""
    
    return {
        "system": [
            {
                "type": "text",
                "text": system_content,
                "cache_control": {"type": "ephemeral"}
            }
        ]
    }

# 使用示例
system_prompt = """
你是一位专业的数据分析师,具有以下专长:
1. 统计分析和数据挖掘
2. 机器学习算法应用  
3. 数据可视化和报告生成
4. 业务洞察和决策支持

请始终提供:
- 准确的数据分析结果
- 清晰的解释和建议
- 实用的实施步骤
- 风险评估和注意事项
"""

cached_system = create_cached_system_message(system_prompt)
用户消息缓存
python 复制代码
def create_cached_user_message(user_content, cache_parts=None):
    """创建带缓存的用户消息"""
    
    if cache_parts is None:
        # 整个消息缓存
        return {
            "role": "user",
            "content": [
                {
                    "type": "text", 
                    "text": user_content,
                    "cache_control": {"type": "ephemeral"}
                }
            ]
        }
    else:
        # 部分内容缓存
        content_parts = []
        
        for part in cache_parts:
            if part.get("cache", False):
                content_parts.append({
                    "type": "text",
                    "text": part["text"],
                    "cache_control": {"type": "ephemeral"}
                })
            else:
                content_parts.append({
                    "type": "text",
                    "text": part["text"]
                })
        
        return {
            "role": "user",
            "content": content_parts
        }

# 部分缓存示例
cache_parts = [
    {
        "text": "以下是需要分析的大型数据集描述...",
        "cache": True  # 这部分内容缓存
    },
    {
        "text": f"请分析今天({time.strftime('%Y-%m-%d')})的数据。",
        "cache": False  # 这部分包含动态内容,不缓存
    }
]

cached_user_message = create_cached_user_message("", cache_parts)

高级缓存策略

智能缓存分割
python 复制代码
def intelligent_cache_splitting(prompt_content, max_cache_size=4000):
    """智能缓存分割"""
    
    # 识别可缓存的部分(静态内容)
    static_patterns = [
        r"系统说明:.*?(?=\n\n|\n用户)",
        r"文档内容:.*?(?=\n\n|\n问题)",
        r"背景信息:.*?(?=\n\n|\n任务)",
        r"参考资料:.*?(?=\n\n|\n要求)"
    ]
    
    import re
    
    cache_candidates = []
    remaining_content = prompt_content
    
    for pattern in static_patterns:
        matches = re.finditer(pattern, prompt_content, re.DOTALL)
        for match in matches:
            content = match.group()
            start, end = match.span()
            
            if len(content.split()) * 1.3 >= 1024:  # 满足最小缓存要求
                cache_candidates.append({
                    "content": content,
                    "start": start,
                    "end": end,
                    "cacheable": True
                })
    
    return optimize_cache_strategy(cache_candidates, prompt_content)

def optimize_cache_strategy(candidates, full_content):
    """优化缓存策略"""
    
    # 按位置排序缓存候选
    candidates.sort(key=lambda x: x["start"])
    
    optimized_parts = []
    last_end = 0
    
    for candidate in candidates:
        # 添加非缓存部分
        if candidate["start"] > last_end:
            optimized_parts.append({
                "content": full_content[last_end:candidate["start"]],
                "cacheable": False
            })
        
        # 添加缓存部分
        optimized_parts.append({
            "content": candidate["content"],
            "cacheable": True
        })
        
        last_end = candidate["end"]
    
    # 添加剩余非缓存部分
    if last_end < len(full_content):
        optimized_parts.append({
            "content": full_content[last_end:],
            "cacheable": False
        })
    
    return optimized_parts
多层缓存架构
python 复制代码
class MultiLevelCacheManager:
    """多层缓存管理器"""
    
    def __init__(self):
        self.l1_cache = {}  # 频繁访问的小内容
        self.l2_cache = {}  # 中等大小的内容
        self.l3_cache = {}  # 大型文档和数据
        
    def determine_cache_level(self, content):
        """确定缓存层级"""
        content_size = len(content)
        
        if content_size < 1000:
            return "l1"
        elif content_size < 10000:
            return "l2"
        else:
            return "l3"
    
    def cache_content(self, key, content, metadata=None):
        """分层缓存内容"""
        
        cache_level = self.determine_cache_level(content)
        
        cache_entry = {
            "content": content,
            "metadata": metadata or {},
            "created_at": time.time(),
            "access_count": 0,
            "cache_level": cache_level
        }
        
        if cache_level == "l1":
            self.l1_cache[key] = cache_entry
        elif cache_level == "l2":
            self.l2_cache[key] = cache_entry
        else:
            self.l3_cache[key] = cache_entry
        
        return cache_entry
    
    def get_cached_content(self, key):
        """获取缓存内容"""
        
        # 按层级顺序搜索
        for cache in [self.l1_cache, self.l2_cache, self.l3_cache]:
            if key in cache:
                cache[key]["access_count"] += 1
                cache[key]["last_accessed"] = time.time()
                return cache[key]
        
        return None
    
    def get_cache_statistics(self):
        """获取缓存统计信息"""
        
        return {
            "l1_cache": {
                "count": len(self.l1_cache),
                "total_size": sum(len(entry["content"]) for entry in self.l1_cache.values())
            },
            "l2_cache": {
                "count": len(self.l2_cache),
                "total_size": sum(len(entry["content"]) for entry in self.l2_cache.values())
            },
            "l3_cache": {
                "count": len(self.l3_cache),
                "total_size": sum(len(entry["content"]) for entry in self.l3_cache.values())
            }
        }

成本优化策略

成本计算模型

基础成本分析
python 复制代码
class CostOptimizer:
    """缓存成本优化器"""
    
    def __init__(self):
        # 模拟的定价模型(每1000个token)
        self.pricing = {
            "opus": {
                "input": 0.015,
                "output": 0.075,
                "cache_write": 0.01875,  # 25%溢价
                "cache_read": 0.0015     # 10%折扣
            },
            "sonnet": {
                "input": 0.003,
                "output": 0.015,
                "cache_write": 0.00375,
                "cache_read": 0.0003
            },
            "haiku": {
                "input": 0.00025,
                "output": 0.00125,
                "cache_write": 0.0003125,
                "cache_read": 0.000025
            }
        }
    
    def calculate_traditional_cost(self, model, input_tokens, output_tokens):
        """计算传统API调用成本"""
        
        model_pricing = self.pricing.get(model, self.pricing["sonnet"])
        
        input_cost = (input_tokens / 1000) * model_pricing["input"]
        output_cost = (output_tokens / 1000) * model_pricing["output"]
        
        return input_cost + output_cost
    
    def calculate_cached_cost(self, model, cached_tokens, new_input_tokens, output_tokens, cache_hits=1):
        """计算使用缓存的成本"""
        
        model_pricing = self.pricing.get(model, self.pricing["sonnet"])
        
        # 首次缓存写入成本
        cache_write_cost = (cached_tokens / 1000) * model_pricing["cache_write"]
        
        # 缓存读取成本
        cache_read_cost = (cached_tokens / 1000) * model_pricing["cache_read"] * cache_hits
        
        # 新输入处理成本
        new_input_cost = (new_input_tokens / 1000) * model_pricing["input"] * cache_hits
        
        # 输出成本
        output_cost = (output_tokens / 1000) * model_pricing["output"] * cache_hits
        
        return cache_write_cost + cache_read_cost + new_input_cost + output_cost
    
    def calculate_breakeven_point(self, model, cached_tokens, new_input_tokens, output_tokens):
        """计算收支平衡点"""
        
        traditional_single_cost = self.calculate_traditional_cost(
            model, cached_tokens + new_input_tokens, output_tokens
        )
        
        # 计算需要多少次调用才能回本
        cache_hits = 1
        while True:
            cached_cost = self.calculate_cached_cost(
                model, cached_tokens, new_input_tokens, output_tokens, cache_hits
            )
            traditional_cost = traditional_single_cost * cache_hits
            
            if cached_cost < traditional_cost:
                return cache_hits
            
            cache_hits += 1
            
            # 防止无限循环
            if cache_hits > 100:
                return None

# 成本分析示例
optimizer = CostOptimizer()

# 假设场景:大型文档分析
cached_tokens = 5000  # 5000个token的文档内容
new_input_tokens = 500  # 500个token的具体问题
output_tokens = 1000  # 1000个token的回答

breakeven = optimizer.calculate_breakeven_point(
    "sonnet", cached_tokens, new_input_tokens, output_tokens
)

print(f"收支平衡点:{breakeven} 次API调用")

# 计算10次调用的成本比较
traditional_cost = optimizer.calculate_traditional_cost(
    "sonnet", cached_tokens + new_input_tokens, output_tokens
) * 10

cached_cost = optimizer.calculate_cached_cost(
    "sonnet", cached_tokens, new_input_tokens, output_tokens, cache_hits=10
)

savings = traditional_cost - cached_cost
savings_percentage = (savings / traditional_cost) * 100

print(f"10次调用传统成本:${traditional_cost:.4f}")
print(f"10次调用缓存成本:${cached_cost:.4f}")
print(f"节省:${savings:.4f} ({savings_percentage:.1f}%)")

智能缓存策略

基于使用模式的优化
python 复制代码
class UsagePatternOptimizer:
    """基于使用模式的缓存优化器"""
    
    def __init__(self):
        self.usage_history = []
        self.cache_performance = {}
    
    def record_usage(self, prompt_hash, tokens, timestamp=None):
        """记录使用模式"""
        
        if timestamp is None:
            timestamp = time.time()
        
        self.usage_history.append({
            "prompt_hash": prompt_hash,
            "tokens": tokens,
            "timestamp": timestamp
        })
    
    def analyze_usage_patterns(self, time_window=3600):  # 1小时窗口
        """分析使用模式"""
        
        current_time = time.time()
        recent_usage = [
            usage for usage in self.usage_history
            if current_time - usage["timestamp"] <= time_window
        ]
        
        # 计算频率
        frequency_map = {}
        for usage in recent_usage:
            prompt_hash = usage["prompt_hash"]
            frequency_map[prompt_hash] = frequency_map.get(prompt_hash, 0) + 1
        
        # 识别高频提示
        high_frequency_prompts = {
            prompt_hash: count 
            for prompt_hash, count in frequency_map.items()
            if count >= 3  # 3次以上认为是高频
        }
        
        return {
            "total_requests": len(recent_usage),
            "unique_prompts": len(frequency_map),
            "high_frequency_prompts": high_frequency_prompts,
            "cache_recommendation": self.generate_cache_recommendations(high_frequency_prompts)
        }
    
    def generate_cache_recommendations(self, high_frequency_prompts):
        """生成缓存建议"""
        
        recommendations = []
        
        for prompt_hash, frequency in high_frequency_prompts.items():
            # 根据频率推荐缓存策略
            if frequency >= 10:
                strategy = "aggressive_caching"
                cache_duration = "extended"
            elif frequency >= 5:
                strategy = "selective_caching"
                cache_duration = "standard"
            else:
                strategy = "minimal_caching"
                cache_duration = "short"
            
            recommendations.append({
                "prompt_hash": prompt_hash,
                "frequency": frequency,
                "strategy": strategy,
                "cache_duration": cache_duration,
                "priority": "high" if frequency >= 10 else "medium"
            })
        
        return recommendations
动态缓存调整
python 复制代码
def dynamic_cache_adjustment(cache_manager, performance_metrics):
    """动态调整缓存策略"""
    
    # 分析缓存命中率
    hit_rate = performance_metrics.get("cache_hit_rate", 0)
    avg_response_time = performance_metrics.get("avg_response_time", 0)
    cost_savings = performance_metrics.get("cost_savings", 0)
    
    adjustments = []
    
    if hit_rate < 0.3:  # 命中率低于30%
        adjustments.append({
            "action": "expand_cache_scope",
            "reason": "低缓存命中率",
            "recommendation": "增加可缓存内容的范围"
        })
    
    if avg_response_time > 2.0:  # 响应时间超过2秒
        adjustments.append({
            "action": "optimize_cache_structure",
            "reason": "响应时间过长",
            "recommendation": "优化缓存数据结构"
        })
    
    if cost_savings < 0.2:  # 成本节省低于20%
        adjustments.append({
            "action": "revise_cache_strategy",
            "reason": "成本效益不佳",
            "recommendation": "重新评估缓存策略"
        })
    
    return {
        "current_metrics": performance_metrics,
        "adjustments": adjustments,
        "next_review": time.time() + 3600  # 1小时后重新评估
    }

性能优化实践

响应时间优化

缓存预热策略
python 复制代码
class CacheWarmupManager:
    """缓存预热管理器"""
    
    def __init__(self, client):
        self.client = client
        self.warmup_queue = []
    
    def add_warmup_content(self, content, priority="normal"):
        """添加预热内容"""
        
        self.warmup_queue.append({
            "content": content,
            "priority": priority,
            "added_at": time.time()
        })
    
    def execute_warmup(self, batch_size=5):
        """执行缓存预热"""
        
        # 按优先级排序
        self.warmup_queue.sort(
            key=lambda x: {"high": 3, "normal": 2, "low": 1}[x["priority"]],
            reverse=True
        )
        
        batch = self.warmup_queue[:batch_size]
        results = []
        
        for item in batch:
            try:
                # 创建预热请求
                warmup_request = {
                    "model": "claude-sonnet-4-20250514",
                    "max_tokens": 10,  # 最小输出以节省成本
                    "messages": [
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "text",
                                    "text": item["content"],
                                    "cache_control": {"type": "ephemeral"}
                                }
                            ]
                        }
                    ]
                }
                
                response = self.client.messages.create(**warmup_request)
                
                results.append({
                    "content_hash": hashlib.md5(item["content"].encode()).hexdigest(),
                    "status": "warmed",
                    "tokens_cached": self.estimate_tokens(item["content"])
                })
                
            except Exception as e:
                results.append({
                    "content_hash": hashlib.md5(item["content"].encode()).hexdigest(),
                    "status": "failed",
                    "error": str(e)
                })
        
        # 移除已处理的项目
        self.warmup_queue = self.warmup_queue[batch_size:]
        
        return results
    
    def estimate_tokens(self, content):
        """估算token数量"""
        # 简化的token估算
        return int(len(content.split()) * 1.3)

# 预热使用示例
warmup_manager = CacheWarmupManager(client)

# 添加常用系统提示到预热队列
common_system_prompts = [
    "你是一位专业的数据分析师...",
    "你是一位经验丰富的软件工程师...",
    "你是一位资深的产品经理..."
]

for prompt in common_system_prompts:
    warmup_manager.add_warmup_content(prompt, priority="high")

# 执行预热
warmup_results = warmup_manager.execute_warmup()
print(f"完成 {len(warmup_results)} 个内容的缓存预热")
并发处理优化
python 复制代码
import asyncio
import aiohttp
from concurrent.futures import ThreadPoolExecutor

class ConcurrentCacheProcessor:
    """并发缓存处理器"""
    
    def __init__(self, max_workers=5):
        self.max_workers = max_workers
        self.executor = ThreadPoolExecutor(max_workers=max_workers)
    
    async def process_multiple_requests(self, requests):
        """并发处理多个请求"""
        
        # 分析请求,识别可复用的缓存
        cache_groups = self.group_by_cache_potential(requests)
        
        tasks = []
        for group in cache_groups:
            if group["cache_reusable"]:
                # 串行处理可复用缓存的请求
                task = self.process_cached_group(group)
            else:
                # 并行处理独立请求
                task = self.process_independent_group(group)
            
            tasks.append(task)
        
        results = await asyncio.gather(*tasks)
        return self.merge_results(results)
    
    def group_by_cache_potential(self, requests):
        """按缓存潜力分组请求"""
        
        groups = []
        cache_map = {}
        
        for i, request in enumerate(requests):
            cache_key = self.extract_cache_key(request)
            
            if cache_key in cache_map:
                cache_map[cache_key]["requests"].append((i, request))
            else:
                cache_map[cache_key] = {
                    "cache_key": cache_key,
                    "requests": [(i, request)],
                    "cache_reusable": len(cache_key) > 100  # 简化的判断逻辑
                }
        
        return list(cache_map.values())
    
    async def process_cached_group(self, group):
        """处理可缓存组"""
        
        results = []
        cache_created = False
        
        for request_index, request in group["requests"]:
            if not cache_created:
                # 第一个请求创建缓存
                request = self.add_cache_control(request)
                cache_created = True
            
            result = await self.process_single_request(request)
            results.append((request_index, result))
        
        return results
    
    async def process_independent_group(self, group):
        """处理独立组"""
        
        tasks = []
        for request_index, request in group["requests"]:
            task = self.process_single_request(request)
            tasks.append((request_index, task))
        
        results = []
        for request_index, task in tasks:
            result = await task
            results.append((request_index, result))
        
        return results
    
    def extract_cache_key(self, request):
        """提取缓存键"""
        # 简化实现:提取系统消息作为缓存键
        if "system" in request:
            return request["system"]
        return ""
    
    def add_cache_control(self, request):
        """添加缓存控制"""
        if "system" in request:
            request["system"] = [
                {
                    "type": "text",
                    "text": request["system"],
                    "cache_control": {"type": "ephemeral"}
                }
            ]
        return request

高级应用场景

文档处理优化

大型文档分析
python 复制代码
class DocumentCacheProcessor:
    """文档缓存处理器"""
    
    def __init__(self, client):
        self.client = client
        self.document_cache = {}
    
    def process_large_document(self, document_content, analysis_tasks):
        """处理大型文档的多个分析任务"""
        
        # 将文档内容缓存
        doc_hash = hashlib.md5(document_content.encode()).hexdigest()
        
        if doc_hash not in self.document_cache:
            self.cache_document(doc_hash, document_content)
        
        results = []
        for task in analysis_tasks:
            result = self.execute_analysis_task(doc_hash, document_content, task)
            results.append(result)
        
        return results
    
    def cache_document(self, doc_hash, content):
        """缓存文档"""
        
        self.document_cache[doc_hash] = {
            "content": content,
            "cached_at": time.time(),
            "access_count": 0
        }
    
    def execute_analysis_task(self, doc_hash, document_content, task):
        """执行分析任务"""
        
        # 更新访问计数
        self.document_cache[doc_hash]["access_count"] += 1
        
        # 构建缓存化的请求
        request = {
            "model": "claude-sonnet-4-20250514",
            "max_tokens": 2000,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"文档内容:\n{document_content}",
                            "cache_control": {"type": "ephemeral"}
                        },
                        {
                            "type": "text",
                            "text": f"\n\n分析任务:{task['description']}\n\n请按照以下要求进行分析:\n{task['requirements']}"
                        }
                    ]
                }
            ]
        }
        
        response = self.client.messages.create(**request)
        
        return {
            "task": task,
            "result": response.content[0].text,
            "doc_hash": doc_hash,
            "cached": True
        }

# 使用示例
doc_processor = DocumentCacheProcessor(client)

# 大型技术文档
technical_document = """
# 系统架构设计文档

## 1. 系统概述
本系统采用微服务架构...

## 2. 技术栈选择
- 后端:Python/Django
- 前端:React/TypeScript
- 数据库:PostgreSQL
- 缓存:Redis
- 消息队列:RabbitMQ

## 3. 系统组件
### 3.1 用户服务
负责用户认证、权限管理...

### 3.2 订单服务  
处理订单创建、更新、查询...

[...继续更多技术细节...]
"""

# 定义多个分析任务
analysis_tasks = [
    {
        "description": "架构分析",
        "requirements": "分析系统架构的优缺点,提供改进建议"
    },
    {
        "description": "技术栈评估",
        "requirements": "评估技术栈选择的合理性,推荐替代方案"
    },
    {
        "description": "安全性审查",
        "requirements": "识别潜在的安全风险,提供加固建议"
    },
    {
        "description": "性能优化",
        "requirements": "分析性能瓶颈,提供优化策略"
    }
]

# 处理文档分析(文档只缓存一次,用于多个任务)
analysis_results = doc_processor.process_large_document(
    technical_document, 
    analysis_tasks
)

print(f"完成 {len(analysis_results)} 个分析任务")
for result in analysis_results:
    print(f"任务:{result['task']['description']}")
    print(f"使用缓存:{result['cached']}")

批量数据处理

数据集分析优化
python 复制代码
class DatasetCacheProcessor:
    """数据集缓存处理器"""
    
    def __init__(self, client):
        self.client = client
        self.schema_cache = {}
        self.metadata_cache = {}
    
    def process_dataset_batch(self, datasets, analysis_type="comprehensive"):
        """批量处理数据集"""
        
        # 识别通用模式和schema
        common_schemas = self.identify_common_schemas(datasets)
        
        results = []
        for dataset in datasets:
            schema_key = self.get_schema_key(dataset)
            
            if schema_key in common_schemas:
                # 使用缓存的schema分析
                result = self.process_with_cached_schema(dataset, schema_key, analysis_type)
            else:
                # 独立处理
                result = self.process_individual_dataset(dataset, analysis_type)
            
            results.append(result)
        
        return results
    
    def identify_common_schemas(self, datasets):
        """识别通用schema模式"""
        
        schema_patterns = {}
        
        for i, dataset in enumerate(datasets):
            schema_signature = self.extract_schema_signature(dataset)
            
            if schema_signature in schema_patterns:
                schema_patterns[schema_signature].append(i)
            else:
                schema_patterns[schema_signature] = [i]
        
        # 返回出现多次的schema(值得缓存)
        return {
            signature: indices 
            for signature, indices in schema_patterns.items()
            if len(indices) > 1
        }
    
    def extract_schema_signature(self, dataset):
        """提取schema签名"""
        
        # 简化的schema提取
        if isinstance(dataset, dict):
            if "columns" in dataset:
                return tuple(sorted(dataset["columns"]))
            elif "schema" in dataset:
                return tuple(sorted(dataset["schema"].keys()))
        
        return "unknown_schema"
    
    def get_schema_key(self, dataset):
        """获取schema键"""
        return self.extract_schema_signature(dataset)
    
    def process_with_cached_schema(self, dataset, schema_key, analysis_type):
        """使用缓存schema处理"""
        
        if schema_key not in self.schema_cache:
            # 创建schema缓存
            schema_description = self.create_schema_description(dataset)
            self.schema_cache[schema_key] = schema_description
        
        schema_description = self.schema_cache[schema_key]
        
        # 构建缓存化请求
        request = {
            "model": "claude-sonnet-4-20250514",
            "max_tokens": 2000,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"数据schema描述:\n{schema_description}",
                            "cache_control": {"type": "ephemeral"}
                        },
                        {
                            "type": "text",
                            "text": f"\n\n当前数据集:\n{self.format_dataset_sample(dataset)}\n\n分析类型:{analysis_type}\n\n请进行数据分析。"
                        }
                    ]
                }
            ]
        }
        
        response = self.client.messages.create(**request)
        
        return {
            "dataset_id": dataset.get("id", "unknown"),
            "analysis": response.content[0].text,
            "schema_cached": True,
            "schema_key": schema_key
        }
    
    def create_schema_description(self, dataset):
        """创建schema描述"""
        
        description = "数据schema信息:\n"
        
        if "columns" in dataset:
            description += "列信息:\n"
            for col in dataset["columns"]:
                description += f"- {col['name']}: {col.get('type', 'unknown')} - {col.get('description', '无描述')}\n"
        
        if "metadata" in dataset:
            description += "\n元数据:\n"
            for key, value in dataset["metadata"].items():
                description += f"- {key}: {value}\n"
        
        return description
    
    def format_dataset_sample(self, dataset):
        """格式化数据集样本"""
        
        if "sample_data" in dataset:
            return f"样本数据:\n{dataset['sample_data']}"
        elif "preview" in dataset:
            return f"数据预览:\n{dataset['preview']}"
        else:
            return "数据集基本信息:" + str(dataset.get("info", "无信息"))

最佳实践指南

缓存策略选择

内容分类与缓存决策
python 复制代码
def determine_cache_strategy(content, usage_context):
    """确定缓存策略"""
    
    content_analysis = analyze_content_characteristics(content)
    usage_analysis = analyze_usage_context(usage_context)
    
    # 内容特征评分
    static_score = content_analysis["static_ratio"]
    size_score = min(content_analysis["size"] / 10000, 1.0)
    reuse_potential = usage_analysis["reuse_potential"]
    
    # 综合评分
    cache_score = (static_score * 0.4 + size_score * 0.3 + reuse_potential * 0.3)
    
    if cache_score >= 0.8:
        return {
            "strategy": "aggressive_caching",
            "cache_level": "full",
            "duration": "extended",
            "recommendation": "强烈建议缓存整个内容"
        }
    elif cache_score >= 0.6:
        return {
            "strategy": "selective_caching", 
            "cache_level": "partial",
            "duration": "standard",
            "recommendation": "缓存静态部分,动态部分实时处理"
        }
    elif cache_score >= 0.3:
        return {
            "strategy": "minimal_caching",
            "cache_level": "key_parts",
            "duration": "short",
            "recommendation": "仅缓存关键可复用部分"
        }
    else:
        return {
            "strategy": "no_caching",
            "cache_level": "none",
            "duration": "none",
            "recommendation": "不建议使用缓存"
        }

def analyze_content_characteristics(content):
    """分析内容特征"""
    
    # 检测静态内容比例
    static_patterns = [
        r"系统指令",
        r"角色定义",
        r"文档内容",
        r"参考资料",
        r"背景信息",
        r"规则说明"
    ]
    
    import re
    static_matches = 0
    for pattern in static_patterns:
        if re.search(pattern, content, re.IGNORECASE):
            static_matches += 1
    
    static_ratio = static_matches / len(static_patterns)
    
    return {
        "size": len(content),
        "static_ratio": static_ratio,
        "complexity": estimate_content_complexity(content)
    }

def analyze_usage_context(context):
    """分析使用上下文"""
    
    frequency = context.get("expected_frequency", 1)
    time_span = context.get("usage_time_span", 300)  # 5分钟默认
    user_count = context.get("concurrent_users", 1)
    
    # 计算复用潜力
    reuse_potential = min((frequency * user_count) / (time_span / 60), 1.0)
    
    return {
        "reuse_potential": reuse_potential,
        "frequency": frequency,
        "time_span": time_span,
        "user_count": user_count
    }

监控和调优

缓存性能监控
python 复制代码
class CachePerformanceMonitor:
    """缓存性能监控器"""
    
    def __init__(self):
        self.metrics = {
            "cache_hits": 0,
            "cache_misses": 0,
            "total_requests": 0,
            "cache_size": 0,
            "response_times": [],
            "cost_savings": 0
        }
        self.detailed_logs = []
    
    def record_request(self, request_info):
        """记录请求信息"""
        
        self.metrics["total_requests"] += 1
        
        if request_info["cache_hit"]:
            self.metrics["cache_hits"] += 1
        else:
            self.metrics["cache_misses"] += 1
        
        self.metrics["response_times"].append(request_info["response_time"])
        self.metrics["cost_savings"] += request_info.get("cost_saving", 0)
        
        # 详细日志
        self.detailed_logs.append({
            "timestamp": time.time(),
            "request_id": request_info.get("request_id"),
            "cache_hit": request_info["cache_hit"],
            "response_time": request_info["response_time"],
            "tokens_processed": request_info.get("tokens_processed"),
            "cost_saving": request_info.get("cost_saving", 0)
        })
    
    def get_performance_summary(self):
        """获取性能摘要"""
        
        total_requests = self.metrics["total_requests"]
        if total_requests == 0:
            return {"error": "没有记录的请求"}
        
        hit_rate = self.metrics["cache_hits"] / total_requests
        avg_response_time = sum(self.metrics["response_times"]) / len(self.metrics["response_times"])
        total_cost_savings = self.metrics["cost_savings"]
        
        return {
            "cache_hit_rate": hit_rate,
            "total_requests": total_requests,
            "avg_response_time": avg_response_time,
            "total_cost_savings": total_cost_savings,
            "performance_grade": self.calculate_performance_grade(hit_rate, avg_response_time),
            "recommendations": self.generate_recommendations(hit_rate, avg_response_time)
        }
    
    def calculate_performance_grade(self, hit_rate, avg_response_time):
        """计算性能等级"""
        
        hit_score = hit_rate * 50  # 最高50分
        time_score = max(0, 50 - (avg_response_time - 1) * 10)  # 最高50分
        
        total_score = hit_score + time_score
        
        if total_score >= 90:
            return "A"
        elif total_score >= 80:
            return "B"
        elif total_score >= 70:
            return "C"
        elif total_score >= 60:
            return "D"
        else:
            return "F"
    
    def generate_recommendations(self, hit_rate, avg_response_time):
        """生成优化建议"""
        
        recommendations = []
        
        if hit_rate < 0.5:
            recommendations.append("缓存命中率偏低,建议扩大缓存范围或优化缓存策略")
        
        if avg_response_time > 3.0:
            recommendations.append("响应时间较长,建议优化缓存结构或增加预热")
        
        if hit_rate > 0.8 and avg_response_time < 1.5:
            recommendations.append("缓存性能良好,可以考虑处理更复杂的任务")
        
        return recommendations

# 监控使用示例
monitor = CachePerformanceMonitor()

# 模拟请求记录
sample_requests = [
    {"cache_hit": True, "response_time": 0.8, "cost_saving": 0.02},
    {"cache_hit": False, "response_time": 2.1, "cost_saving": 0},
    {"cache_hit": True, "response_time": 0.6, "cost_saving": 0.03},
    {"cache_hit": True, "response_time": 0.9, "cost_saving": 0.025}
]

for request in sample_requests:
    monitor.record_request(request)

performance_summary = monitor.get_performance_summary()
print(f"缓存命中率:{performance_summary['cache_hit_rate']:.2%}")
print(f"平均响应时间:{performance_summary['avg_response_time']:.2f}秒")
print(f"性能等级:{performance_summary['performance_grade']}")

故障排除指南

常见问题诊断
python 复制代码
class CacheTroubleshooter:
    """缓存故障排除器"""
    
    def __init__(self):
        self.diagnostic_checks = [
            self.check_token_requirements,
            self.check_model_support,
            self.check_cache_format,
            self.check_content_suitability,
            self.check_api_configuration
        ]
    
    def diagnose_cache_issues(self, cache_request, error_info=None):
        """诊断缓存问题"""
        
        diagnosis_results = []
        
        for check in self.diagnostic_checks:
            result = check(cache_request, error_info)
            diagnosis_results.append(result)
        
        # 综合诊断结果
        issues_found = [r for r in diagnosis_results if not r["passed"]]
        
        return {
            "overall_status": "healthy" if len(issues_found) == 0 else "issues_found",
            "issues_count": len(issues_found),
            "issues": issues_found,
            "recommendations": self.generate_fix_recommendations(issues_found)
        }
    
    def check_token_requirements(self, request, error_info):
        """检查token要求"""
        
        content = self.extract_cacheable_content(request)
        estimated_tokens = len(content.split()) * 1.3  # 简化估算
        
        model = request.get("model", "")
        min_tokens = 2048 if "haiku" in model else 1024
        
        if estimated_tokens >= min_tokens:
            return {
                "check": "token_requirements",
                "passed": True,
                "message": f"Token数量满足要求 ({estimated_tokens:.0f} >= {min_tokens})"
            }
        else:
            return {
                "check": "token_requirements", 
                "passed": False,
                "message": f"Token数量不足 ({estimated_tokens:.0f} < {min_tokens})",
                "fix": f"内容至少需要 {min_tokens} 个token才能缓存"
            }
    
    def check_model_support(self, request, error_info):
        """检查模型支持"""
        
        model = request.get("model", "")
        supported_models = [
            "claude-opus-4", "claude-sonnet-4", "claude-sonnet-3-7",
            "claude-sonnet-3-5", "claude-haiku-3-5", "claude-haiku-3",
            "claude-opus-3"
        ]
        
        model_supported = any(supported in model for supported in supported_models)
        
        if model_supported:
            return {
                "check": "model_support",
                "passed": True,
                "message": f"模型 {model} 支持缓存"
            }
        else:
            return {
                "check": "model_support",
                "passed": False,
                "message": f"模型 {model} 不支持缓存",
                "fix": "请使用支持缓存的模型版本"
            }
    
    def check_cache_format(self, request, error_info):
        """检查缓存格式"""
        
        has_cache_control = self.find_cache_control_in_request(request)
        
        if has_cache_control:
            return {
                "check": "cache_format",
                "passed": True,
                "message": "发现正确的缓存控制格式"
            }
        else:
            return {
                "check": "cache_format",
                "passed": False,
                "message": "未找到缓存控制标记",
                "fix": "添加 cache_control: {type: 'ephemeral'} 到需要缓存的内容"
            }
    
    def extract_cacheable_content(self, request):
        """提取可缓存内容"""
        content = ""
        
        if "system" in request:
            content += str(request["system"])
        
        if "messages" in request:
            for message in request["messages"]:
                if isinstance(message.get("content"), list):
                    for part in message["content"]:
                        if part.get("cache_control"):
                            content += part.get("text", "")
        
        return content
    
    def find_cache_control_in_request(self, request):
        """在请求中查找缓存控制"""
        
        # 检查系统消息
        if isinstance(request.get("system"), list):
            for item in request["system"]:
                if item.get("cache_control"):
                    return True
        
        # 检查用户消息
        if "messages" in request:
            for message in request["messages"]:
                if isinstance(message.get("content"), list):
                    for part in message["content"]:
                        if part.get("cache_control"):
                            return True
        
        return False
    
    def generate_fix_recommendations(self, issues):
        """生成修复建议"""
        
        if not issues:
            return ["缓存配置正常,无需修复"]
        
        recommendations = []
        
        for issue in issues:
            if "fix" in issue:
                recommendations.append(f"{issue['check']}: {issue['fix']}")
        
        # 添加通用建议
        recommendations.extend([
            "确保API密钥有效且有足够的配额",
            "检查网络连接是否稳定",
            "考虑实施重试机制处理临时错误"
        ])
        
        return recommendations

# 故障排除使用示例
troubleshooter = CacheTroubleshooter()

# 示例问题请求
problematic_request = {
    "model": "claude-haiku-3-5-20241022",
    "messages": [
        {
            "role": "user",
            "content": "简短的问题"  # 内容太短,无法缓存
        }
    ]
}

diagnosis = troubleshooter.diagnose_cache_issues(problematic_request)

print("缓存诊断结果:")
print(f"状态:{diagnosis['overall_status']}")
print(f"发现问题:{diagnosis['issues_count']} 个")

for issue in diagnosis['issues']:
    print(f"- {issue['check']}: {issue['message']}")

print("\n修复建议:")
for rec in diagnosis['recommendations']:
    print(f"- {rec}")

通过实施这些缓存策略和优化技术,可以显著提升Claude API的性能表现,降低使用成本,并提供更流畅的用户体验。