提示缓存与性能优化
目录
缓存技术概述
什么是提示缓存
提示缓存是Claude API的一项优化功能,允许缓存提示的特定部分以便重复使用,从而显著减少处理时间和API调用成本。
核心优势
成本降低
- 写入成本:缓存写入成本比常规输入高25%
- 读取成本:缓存读取仅为常规输入成本的10%
- 整体节省:频繁重复使用可节省高达90%的成本
性能提升
- 响应速度:缓存命中时响应时间显著减少
- 处理效率:避免重复计算相同的提示内容
- 系统负载:减少服务器计算压力
应用灵活性
- 智能缓存:自动识别可缓存的提示部分
- 动态管理:5分钟的默认缓存生命周期
- 增量更新:支持部分缓存更新
支持的模型
python
SUPPORTED_MODELS = [
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-sonnet-3-7-20240229",
"claude-sonnet-3-5-20240620",
"claude-haiku-3-5-20241022",
"claude-haiku-3-20240307",
"claude-opus-3-20240229"
]
def check_cache_support(model_name):
"""检查模型是否支持缓存"""
return model_name in SUPPORTED_MODELS
# 检查示例
if check_cache_support("claude-sonnet-4-20250514"):
print("该模型支持提示缓存功能")
缓存工作原理
缓存机制详解
缓存创建流程
- 提示分析:系统分析提示内容识别可缓存部分
- 缓存写入:将指定内容写入缓存存储
- 缓存标识:生成唯一的缓存标识符
- 生命周期管理:设置5分钟的初始生命周期
缓存匹配逻辑
python
import anthropic
import hashlib
import time
class PromptCacheManager:
"""提示缓存管理器"""
def __init__(self):
self.client = anthropic.Anthropic()
self.cache_registry = {}
def create_cache_key(self, content):
"""创建缓存键"""
return hashlib.md5(content.encode()).hexdigest()
def is_cacheable(self, content, model_type="sonnet"):
"""检查内容是否可缓存"""
token_limits = {
"opus": 1024,
"sonnet": 1024,
"haiku": 2048
}
# 简化的token计算(实际应使用官方tokenizer)
estimated_tokens = len(content.split()) * 1.3
min_tokens = token_limits.get(model_type, 1024)
return estimated_tokens >= min_tokens
def cache_prompt_section(self, content, section_type="system"):
"""缓存提示部分"""
if not self.is_cacheable(content):
print(f"内容长度不足,无法缓存(需要至少1024个token)")
return None
cache_key = self.create_cache_key(content)
cached_content = {
"type": "text",
"text": content,
"cache_control": {"type": "ephemeral"}
}
self.cache_registry[cache_key] = {
"content": cached_content,
"created_at": time.time(),
"access_count": 0
}
return cached_content
缓存生命周期
python
def manage_cache_lifecycle(cache_manager):
"""管理缓存生命周期"""
current_time = time.time()
expired_keys = []
for key, cache_info in cache_manager.cache_registry.items():
# 检查是否超过5分钟
if current_time - cache_info["created_at"] > 300: # 5分钟 = 300秒
expired_keys.append(key)
# 清理过期缓存
for key in expired_keys:
del cache_manager.cache_registry[key]
print(f"缓存 {key} 已过期并被清理")
return len(expired_keys)
缓存匹配策略
前缀匹配算法
python
def find_cache_match(new_prompt, cached_prompts):
"""查找缓存匹配"""
best_match = None
max_match_length = 0
for cached_key, cached_content in cached_prompts.items():
cached_text = cached_content["content"]["text"]
# 计算公共前缀长度
match_length = 0
min_length = min(len(new_prompt), len(cached_text))
for i in range(min_length):
if new_prompt[i] == cached_text[i]:
match_length += 1
else:
break
# 更新最佳匹配
if match_length > max_match_length and match_length > 0:
max_match_length = match_length
best_match = {
"key": cached_key,
"match_length": match_length,
"cache_ratio": match_length / len(new_prompt)
}
return best_match
# 使用示例
cache_manager = PromptCacheManager()
new_prompt = "分析以下文档的主要内容和结构..."
match_result = find_cache_match(new_prompt, cache_manager.cache_registry)
if match_result and match_result["cache_ratio"] > 0.8:
print(f"找到高质量缓存匹配,匹配度:{match_result['cache_ratio']:.2%}")
实现方法详解
基础缓存实现
系统消息缓存
python
def create_cached_system_message(system_content):
"""创建带缓存的系统消息"""
return {
"system": [
{
"type": "text",
"text": system_content,
"cache_control": {"type": "ephemeral"}
}
]
}
# 使用示例
system_prompt = """
你是一位专业的数据分析师,具有以下专长:
1. 统计分析和数据挖掘
2. 机器学习算法应用
3. 数据可视化和报告生成
4. 业务洞察和决策支持
请始终提供:
- 准确的数据分析结果
- 清晰的解释和建议
- 实用的实施步骤
- 风险评估和注意事项
"""
cached_system = create_cached_system_message(system_prompt)
用户消息缓存
python
def create_cached_user_message(user_content, cache_parts=None):
"""创建带缓存的用户消息"""
if cache_parts is None:
# 整个消息缓存
return {
"role": "user",
"content": [
{
"type": "text",
"text": user_content,
"cache_control": {"type": "ephemeral"}
}
]
}
else:
# 部分内容缓存
content_parts = []
for part in cache_parts:
if part.get("cache", False):
content_parts.append({
"type": "text",
"text": part["text"],
"cache_control": {"type": "ephemeral"}
})
else:
content_parts.append({
"type": "text",
"text": part["text"]
})
return {
"role": "user",
"content": content_parts
}
# 部分缓存示例
cache_parts = [
{
"text": "以下是需要分析的大型数据集描述...",
"cache": True # 这部分内容缓存
},
{
"text": f"请分析今天({time.strftime('%Y-%m-%d')})的数据。",
"cache": False # 这部分包含动态内容,不缓存
}
]
cached_user_message = create_cached_user_message("", cache_parts)
高级缓存策略
智能缓存分割
python
def intelligent_cache_splitting(prompt_content, max_cache_size=4000):
"""智能缓存分割"""
# 识别可缓存的部分(静态内容)
static_patterns = [
r"系统说明:.*?(?=\n\n|\n用户)",
r"文档内容:.*?(?=\n\n|\n问题)",
r"背景信息:.*?(?=\n\n|\n任务)",
r"参考资料:.*?(?=\n\n|\n要求)"
]
import re
cache_candidates = []
remaining_content = prompt_content
for pattern in static_patterns:
matches = re.finditer(pattern, prompt_content, re.DOTALL)
for match in matches:
content = match.group()
start, end = match.span()
if len(content.split()) * 1.3 >= 1024: # 满足最小缓存要求
cache_candidates.append({
"content": content,
"start": start,
"end": end,
"cacheable": True
})
return optimize_cache_strategy(cache_candidates, prompt_content)
def optimize_cache_strategy(candidates, full_content):
"""优化缓存策略"""
# 按位置排序缓存候选
candidates.sort(key=lambda x: x["start"])
optimized_parts = []
last_end = 0
for candidate in candidates:
# 添加非缓存部分
if candidate["start"] > last_end:
optimized_parts.append({
"content": full_content[last_end:candidate["start"]],
"cacheable": False
})
# 添加缓存部分
optimized_parts.append({
"content": candidate["content"],
"cacheable": True
})
last_end = candidate["end"]
# 添加剩余非缓存部分
if last_end < len(full_content):
optimized_parts.append({
"content": full_content[last_end:],
"cacheable": False
})
return optimized_parts
多层缓存架构
python
class MultiLevelCacheManager:
"""多层缓存管理器"""
def __init__(self):
self.l1_cache = {} # 频繁访问的小内容
self.l2_cache = {} # 中等大小的内容
self.l3_cache = {} # 大型文档和数据
def determine_cache_level(self, content):
"""确定缓存层级"""
content_size = len(content)
if content_size < 1000:
return "l1"
elif content_size < 10000:
return "l2"
else:
return "l3"
def cache_content(self, key, content, metadata=None):
"""分层缓存内容"""
cache_level = self.determine_cache_level(content)
cache_entry = {
"content": content,
"metadata": metadata or {},
"created_at": time.time(),
"access_count": 0,
"cache_level": cache_level
}
if cache_level == "l1":
self.l1_cache[key] = cache_entry
elif cache_level == "l2":
self.l2_cache[key] = cache_entry
else:
self.l3_cache[key] = cache_entry
return cache_entry
def get_cached_content(self, key):
"""获取缓存内容"""
# 按层级顺序搜索
for cache in [self.l1_cache, self.l2_cache, self.l3_cache]:
if key in cache:
cache[key]["access_count"] += 1
cache[key]["last_accessed"] = time.time()
return cache[key]
return None
def get_cache_statistics(self):
"""获取缓存统计信息"""
return {
"l1_cache": {
"count": len(self.l1_cache),
"total_size": sum(len(entry["content"]) for entry in self.l1_cache.values())
},
"l2_cache": {
"count": len(self.l2_cache),
"total_size": sum(len(entry["content"]) for entry in self.l2_cache.values())
},
"l3_cache": {
"count": len(self.l3_cache),
"total_size": sum(len(entry["content"]) for entry in self.l3_cache.values())
}
}
成本优化策略
成本计算模型
基础成本分析
python
class CostOptimizer:
"""缓存成本优化器"""
def __init__(self):
# 模拟的定价模型(每1000个token)
self.pricing = {
"opus": {
"input": 0.015,
"output": 0.075,
"cache_write": 0.01875, # 25%溢价
"cache_read": 0.0015 # 10%折扣
},
"sonnet": {
"input": 0.003,
"output": 0.015,
"cache_write": 0.00375,
"cache_read": 0.0003
},
"haiku": {
"input": 0.00025,
"output": 0.00125,
"cache_write": 0.0003125,
"cache_read": 0.000025
}
}
def calculate_traditional_cost(self, model, input_tokens, output_tokens):
"""计算传统API调用成本"""
model_pricing = self.pricing.get(model, self.pricing["sonnet"])
input_cost = (input_tokens / 1000) * model_pricing["input"]
output_cost = (output_tokens / 1000) * model_pricing["output"]
return input_cost + output_cost
def calculate_cached_cost(self, model, cached_tokens, new_input_tokens, output_tokens, cache_hits=1):
"""计算使用缓存的成本"""
model_pricing = self.pricing.get(model, self.pricing["sonnet"])
# 首次缓存写入成本
cache_write_cost = (cached_tokens / 1000) * model_pricing["cache_write"]
# 缓存读取成本
cache_read_cost = (cached_tokens / 1000) * model_pricing["cache_read"] * cache_hits
# 新输入处理成本
new_input_cost = (new_input_tokens / 1000) * model_pricing["input"] * cache_hits
# 输出成本
output_cost = (output_tokens / 1000) * model_pricing["output"] * cache_hits
return cache_write_cost + cache_read_cost + new_input_cost + output_cost
def calculate_breakeven_point(self, model, cached_tokens, new_input_tokens, output_tokens):
"""计算收支平衡点"""
traditional_single_cost = self.calculate_traditional_cost(
model, cached_tokens + new_input_tokens, output_tokens
)
# 计算需要多少次调用才能回本
cache_hits = 1
while True:
cached_cost = self.calculate_cached_cost(
model, cached_tokens, new_input_tokens, output_tokens, cache_hits
)
traditional_cost = traditional_single_cost * cache_hits
if cached_cost < traditional_cost:
return cache_hits
cache_hits += 1
# 防止无限循环
if cache_hits > 100:
return None
# 成本分析示例
optimizer = CostOptimizer()
# 假设场景:大型文档分析
cached_tokens = 5000 # 5000个token的文档内容
new_input_tokens = 500 # 500个token的具体问题
output_tokens = 1000 # 1000个token的回答
breakeven = optimizer.calculate_breakeven_point(
"sonnet", cached_tokens, new_input_tokens, output_tokens
)
print(f"收支平衡点:{breakeven} 次API调用")
# 计算10次调用的成本比较
traditional_cost = optimizer.calculate_traditional_cost(
"sonnet", cached_tokens + new_input_tokens, output_tokens
) * 10
cached_cost = optimizer.calculate_cached_cost(
"sonnet", cached_tokens, new_input_tokens, output_tokens, cache_hits=10
)
savings = traditional_cost - cached_cost
savings_percentage = (savings / traditional_cost) * 100
print(f"10次调用传统成本:${traditional_cost:.4f}")
print(f"10次调用缓存成本:${cached_cost:.4f}")
print(f"节省:${savings:.4f} ({savings_percentage:.1f}%)")
智能缓存策略
基于使用模式的优化
python
class UsagePatternOptimizer:
"""基于使用模式的缓存优化器"""
def __init__(self):
self.usage_history = []
self.cache_performance = {}
def record_usage(self, prompt_hash, tokens, timestamp=None):
"""记录使用模式"""
if timestamp is None:
timestamp = time.time()
self.usage_history.append({
"prompt_hash": prompt_hash,
"tokens": tokens,
"timestamp": timestamp
})
def analyze_usage_patterns(self, time_window=3600): # 1小时窗口
"""分析使用模式"""
current_time = time.time()
recent_usage = [
usage for usage in self.usage_history
if current_time - usage["timestamp"] <= time_window
]
# 计算频率
frequency_map = {}
for usage in recent_usage:
prompt_hash = usage["prompt_hash"]
frequency_map[prompt_hash] = frequency_map.get(prompt_hash, 0) + 1
# 识别高频提示
high_frequency_prompts = {
prompt_hash: count
for prompt_hash, count in frequency_map.items()
if count >= 3 # 3次以上认为是高频
}
return {
"total_requests": len(recent_usage),
"unique_prompts": len(frequency_map),
"high_frequency_prompts": high_frequency_prompts,
"cache_recommendation": self.generate_cache_recommendations(high_frequency_prompts)
}
def generate_cache_recommendations(self, high_frequency_prompts):
"""生成缓存建议"""
recommendations = []
for prompt_hash, frequency in high_frequency_prompts.items():
# 根据频率推荐缓存策略
if frequency >= 10:
strategy = "aggressive_caching"
cache_duration = "extended"
elif frequency >= 5:
strategy = "selective_caching"
cache_duration = "standard"
else:
strategy = "minimal_caching"
cache_duration = "short"
recommendations.append({
"prompt_hash": prompt_hash,
"frequency": frequency,
"strategy": strategy,
"cache_duration": cache_duration,
"priority": "high" if frequency >= 10 else "medium"
})
return recommendations
动态缓存调整
python
def dynamic_cache_adjustment(cache_manager, performance_metrics):
"""动态调整缓存策略"""
# 分析缓存命中率
hit_rate = performance_metrics.get("cache_hit_rate", 0)
avg_response_time = performance_metrics.get("avg_response_time", 0)
cost_savings = performance_metrics.get("cost_savings", 0)
adjustments = []
if hit_rate < 0.3: # 命中率低于30%
adjustments.append({
"action": "expand_cache_scope",
"reason": "低缓存命中率",
"recommendation": "增加可缓存内容的范围"
})
if avg_response_time > 2.0: # 响应时间超过2秒
adjustments.append({
"action": "optimize_cache_structure",
"reason": "响应时间过长",
"recommendation": "优化缓存数据结构"
})
if cost_savings < 0.2: # 成本节省低于20%
adjustments.append({
"action": "revise_cache_strategy",
"reason": "成本效益不佳",
"recommendation": "重新评估缓存策略"
})
return {
"current_metrics": performance_metrics,
"adjustments": adjustments,
"next_review": time.time() + 3600 # 1小时后重新评估
}
性能优化实践
响应时间优化
缓存预热策略
python
class CacheWarmupManager:
"""缓存预热管理器"""
def __init__(self, client):
self.client = client
self.warmup_queue = []
def add_warmup_content(self, content, priority="normal"):
"""添加预热内容"""
self.warmup_queue.append({
"content": content,
"priority": priority,
"added_at": time.time()
})
def execute_warmup(self, batch_size=5):
"""执行缓存预热"""
# 按优先级排序
self.warmup_queue.sort(
key=lambda x: {"high": 3, "normal": 2, "low": 1}[x["priority"]],
reverse=True
)
batch = self.warmup_queue[:batch_size]
results = []
for item in batch:
try:
# 创建预热请求
warmup_request = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 10, # 最小输出以节省成本
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": item["content"],
"cache_control": {"type": "ephemeral"}
}
]
}
]
}
response = self.client.messages.create(**warmup_request)
results.append({
"content_hash": hashlib.md5(item["content"].encode()).hexdigest(),
"status": "warmed",
"tokens_cached": self.estimate_tokens(item["content"])
})
except Exception as e:
results.append({
"content_hash": hashlib.md5(item["content"].encode()).hexdigest(),
"status": "failed",
"error": str(e)
})
# 移除已处理的项目
self.warmup_queue = self.warmup_queue[batch_size:]
return results
def estimate_tokens(self, content):
"""估算token数量"""
# 简化的token估算
return int(len(content.split()) * 1.3)
# 预热使用示例
warmup_manager = CacheWarmupManager(client)
# 添加常用系统提示到预热队列
common_system_prompts = [
"你是一位专业的数据分析师...",
"你是一位经验丰富的软件工程师...",
"你是一位资深的产品经理..."
]
for prompt in common_system_prompts:
warmup_manager.add_warmup_content(prompt, priority="high")
# 执行预热
warmup_results = warmup_manager.execute_warmup()
print(f"完成 {len(warmup_results)} 个内容的缓存预热")
并发处理优化
python
import asyncio
import aiohttp
from concurrent.futures import ThreadPoolExecutor
class ConcurrentCacheProcessor:
"""并发缓存处理器"""
def __init__(self, max_workers=5):
self.max_workers = max_workers
self.executor = ThreadPoolExecutor(max_workers=max_workers)
async def process_multiple_requests(self, requests):
"""并发处理多个请求"""
# 分析请求,识别可复用的缓存
cache_groups = self.group_by_cache_potential(requests)
tasks = []
for group in cache_groups:
if group["cache_reusable"]:
# 串行处理可复用缓存的请求
task = self.process_cached_group(group)
else:
# 并行处理独立请求
task = self.process_independent_group(group)
tasks.append(task)
results = await asyncio.gather(*tasks)
return self.merge_results(results)
def group_by_cache_potential(self, requests):
"""按缓存潜力分组请求"""
groups = []
cache_map = {}
for i, request in enumerate(requests):
cache_key = self.extract_cache_key(request)
if cache_key in cache_map:
cache_map[cache_key]["requests"].append((i, request))
else:
cache_map[cache_key] = {
"cache_key": cache_key,
"requests": [(i, request)],
"cache_reusable": len(cache_key) > 100 # 简化的判断逻辑
}
return list(cache_map.values())
async def process_cached_group(self, group):
"""处理可缓存组"""
results = []
cache_created = False
for request_index, request in group["requests"]:
if not cache_created:
# 第一个请求创建缓存
request = self.add_cache_control(request)
cache_created = True
result = await self.process_single_request(request)
results.append((request_index, result))
return results
async def process_independent_group(self, group):
"""处理独立组"""
tasks = []
for request_index, request in group["requests"]:
task = self.process_single_request(request)
tasks.append((request_index, task))
results = []
for request_index, task in tasks:
result = await task
results.append((request_index, result))
return results
def extract_cache_key(self, request):
"""提取缓存键"""
# 简化实现:提取系统消息作为缓存键
if "system" in request:
return request["system"]
return ""
def add_cache_control(self, request):
"""添加缓存控制"""
if "system" in request:
request["system"] = [
{
"type": "text",
"text": request["system"],
"cache_control": {"type": "ephemeral"}
}
]
return request
高级应用场景
文档处理优化
大型文档分析
python
class DocumentCacheProcessor:
"""文档缓存处理器"""
def __init__(self, client):
self.client = client
self.document_cache = {}
def process_large_document(self, document_content, analysis_tasks):
"""处理大型文档的多个分析任务"""
# 将文档内容缓存
doc_hash = hashlib.md5(document_content.encode()).hexdigest()
if doc_hash not in self.document_cache:
self.cache_document(doc_hash, document_content)
results = []
for task in analysis_tasks:
result = self.execute_analysis_task(doc_hash, document_content, task)
results.append(result)
return results
def cache_document(self, doc_hash, content):
"""缓存文档"""
self.document_cache[doc_hash] = {
"content": content,
"cached_at": time.time(),
"access_count": 0
}
def execute_analysis_task(self, doc_hash, document_content, task):
"""执行分析任务"""
# 更新访问计数
self.document_cache[doc_hash]["access_count"] += 1
# 构建缓存化的请求
request = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 2000,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": f"文档内容:\n{document_content}",
"cache_control": {"type": "ephemeral"}
},
{
"type": "text",
"text": f"\n\n分析任务:{task['description']}\n\n请按照以下要求进行分析:\n{task['requirements']}"
}
]
}
]
}
response = self.client.messages.create(**request)
return {
"task": task,
"result": response.content[0].text,
"doc_hash": doc_hash,
"cached": True
}
# 使用示例
doc_processor = DocumentCacheProcessor(client)
# 大型技术文档
technical_document = """
# 系统架构设计文档
## 1. 系统概述
本系统采用微服务架构...
## 2. 技术栈选择
- 后端:Python/Django
- 前端:React/TypeScript
- 数据库:PostgreSQL
- 缓存:Redis
- 消息队列:RabbitMQ
## 3. 系统组件
### 3.1 用户服务
负责用户认证、权限管理...
### 3.2 订单服务
处理订单创建、更新、查询...
[...继续更多技术细节...]
"""
# 定义多个分析任务
analysis_tasks = [
{
"description": "架构分析",
"requirements": "分析系统架构的优缺点,提供改进建议"
},
{
"description": "技术栈评估",
"requirements": "评估技术栈选择的合理性,推荐替代方案"
},
{
"description": "安全性审查",
"requirements": "识别潜在的安全风险,提供加固建议"
},
{
"description": "性能优化",
"requirements": "分析性能瓶颈,提供优化策略"
}
]
# 处理文档分析(文档只缓存一次,用于多个任务)
analysis_results = doc_processor.process_large_document(
technical_document,
analysis_tasks
)
print(f"完成 {len(analysis_results)} 个分析任务")
for result in analysis_results:
print(f"任务:{result['task']['description']}")
print(f"使用缓存:{result['cached']}")
批量数据处理
数据集分析优化
python
class DatasetCacheProcessor:
"""数据集缓存处理器"""
def __init__(self, client):
self.client = client
self.schema_cache = {}
self.metadata_cache = {}
def process_dataset_batch(self, datasets, analysis_type="comprehensive"):
"""批量处理数据集"""
# 识别通用模式和schema
common_schemas = self.identify_common_schemas(datasets)
results = []
for dataset in datasets:
schema_key = self.get_schema_key(dataset)
if schema_key in common_schemas:
# 使用缓存的schema分析
result = self.process_with_cached_schema(dataset, schema_key, analysis_type)
else:
# 独立处理
result = self.process_individual_dataset(dataset, analysis_type)
results.append(result)
return results
def identify_common_schemas(self, datasets):
"""识别通用schema模式"""
schema_patterns = {}
for i, dataset in enumerate(datasets):
schema_signature = self.extract_schema_signature(dataset)
if schema_signature in schema_patterns:
schema_patterns[schema_signature].append(i)
else:
schema_patterns[schema_signature] = [i]
# 返回出现多次的schema(值得缓存)
return {
signature: indices
for signature, indices in schema_patterns.items()
if len(indices) > 1
}
def extract_schema_signature(self, dataset):
"""提取schema签名"""
# 简化的schema提取
if isinstance(dataset, dict):
if "columns" in dataset:
return tuple(sorted(dataset["columns"]))
elif "schema" in dataset:
return tuple(sorted(dataset["schema"].keys()))
return "unknown_schema"
def get_schema_key(self, dataset):
"""获取schema键"""
return self.extract_schema_signature(dataset)
def process_with_cached_schema(self, dataset, schema_key, analysis_type):
"""使用缓存schema处理"""
if schema_key not in self.schema_cache:
# 创建schema缓存
schema_description = self.create_schema_description(dataset)
self.schema_cache[schema_key] = schema_description
schema_description = self.schema_cache[schema_key]
# 构建缓存化请求
request = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 2000,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": f"数据schema描述:\n{schema_description}",
"cache_control": {"type": "ephemeral"}
},
{
"type": "text",
"text": f"\n\n当前数据集:\n{self.format_dataset_sample(dataset)}\n\n分析类型:{analysis_type}\n\n请进行数据分析。"
}
]
}
]
}
response = self.client.messages.create(**request)
return {
"dataset_id": dataset.get("id", "unknown"),
"analysis": response.content[0].text,
"schema_cached": True,
"schema_key": schema_key
}
def create_schema_description(self, dataset):
"""创建schema描述"""
description = "数据schema信息:\n"
if "columns" in dataset:
description += "列信息:\n"
for col in dataset["columns"]:
description += f"- {col['name']}: {col.get('type', 'unknown')} - {col.get('description', '无描述')}\n"
if "metadata" in dataset:
description += "\n元数据:\n"
for key, value in dataset["metadata"].items():
description += f"- {key}: {value}\n"
return description
def format_dataset_sample(self, dataset):
"""格式化数据集样本"""
if "sample_data" in dataset:
return f"样本数据:\n{dataset['sample_data']}"
elif "preview" in dataset:
return f"数据预览:\n{dataset['preview']}"
else:
return "数据集基本信息:" + str(dataset.get("info", "无信息"))
最佳实践指南
缓存策略选择
内容分类与缓存决策
python
def determine_cache_strategy(content, usage_context):
"""确定缓存策略"""
content_analysis = analyze_content_characteristics(content)
usage_analysis = analyze_usage_context(usage_context)
# 内容特征评分
static_score = content_analysis["static_ratio"]
size_score = min(content_analysis["size"] / 10000, 1.0)
reuse_potential = usage_analysis["reuse_potential"]
# 综合评分
cache_score = (static_score * 0.4 + size_score * 0.3 + reuse_potential * 0.3)
if cache_score >= 0.8:
return {
"strategy": "aggressive_caching",
"cache_level": "full",
"duration": "extended",
"recommendation": "强烈建议缓存整个内容"
}
elif cache_score >= 0.6:
return {
"strategy": "selective_caching",
"cache_level": "partial",
"duration": "standard",
"recommendation": "缓存静态部分,动态部分实时处理"
}
elif cache_score >= 0.3:
return {
"strategy": "minimal_caching",
"cache_level": "key_parts",
"duration": "short",
"recommendation": "仅缓存关键可复用部分"
}
else:
return {
"strategy": "no_caching",
"cache_level": "none",
"duration": "none",
"recommendation": "不建议使用缓存"
}
def analyze_content_characteristics(content):
"""分析内容特征"""
# 检测静态内容比例
static_patterns = [
r"系统指令",
r"角色定义",
r"文档内容",
r"参考资料",
r"背景信息",
r"规则说明"
]
import re
static_matches = 0
for pattern in static_patterns:
if re.search(pattern, content, re.IGNORECASE):
static_matches += 1
static_ratio = static_matches / len(static_patterns)
return {
"size": len(content),
"static_ratio": static_ratio,
"complexity": estimate_content_complexity(content)
}
def analyze_usage_context(context):
"""分析使用上下文"""
frequency = context.get("expected_frequency", 1)
time_span = context.get("usage_time_span", 300) # 5分钟默认
user_count = context.get("concurrent_users", 1)
# 计算复用潜力
reuse_potential = min((frequency * user_count) / (time_span / 60), 1.0)
return {
"reuse_potential": reuse_potential,
"frequency": frequency,
"time_span": time_span,
"user_count": user_count
}
监控和调优
缓存性能监控
python
class CachePerformanceMonitor:
"""缓存性能监控器"""
def __init__(self):
self.metrics = {
"cache_hits": 0,
"cache_misses": 0,
"total_requests": 0,
"cache_size": 0,
"response_times": [],
"cost_savings": 0
}
self.detailed_logs = []
def record_request(self, request_info):
"""记录请求信息"""
self.metrics["total_requests"] += 1
if request_info["cache_hit"]:
self.metrics["cache_hits"] += 1
else:
self.metrics["cache_misses"] += 1
self.metrics["response_times"].append(request_info["response_time"])
self.metrics["cost_savings"] += request_info.get("cost_saving", 0)
# 详细日志
self.detailed_logs.append({
"timestamp": time.time(),
"request_id": request_info.get("request_id"),
"cache_hit": request_info["cache_hit"],
"response_time": request_info["response_time"],
"tokens_processed": request_info.get("tokens_processed"),
"cost_saving": request_info.get("cost_saving", 0)
})
def get_performance_summary(self):
"""获取性能摘要"""
total_requests = self.metrics["total_requests"]
if total_requests == 0:
return {"error": "没有记录的请求"}
hit_rate = self.metrics["cache_hits"] / total_requests
avg_response_time = sum(self.metrics["response_times"]) / len(self.metrics["response_times"])
total_cost_savings = self.metrics["cost_savings"]
return {
"cache_hit_rate": hit_rate,
"total_requests": total_requests,
"avg_response_time": avg_response_time,
"total_cost_savings": total_cost_savings,
"performance_grade": self.calculate_performance_grade(hit_rate, avg_response_time),
"recommendations": self.generate_recommendations(hit_rate, avg_response_time)
}
def calculate_performance_grade(self, hit_rate, avg_response_time):
"""计算性能等级"""
hit_score = hit_rate * 50 # 最高50分
time_score = max(0, 50 - (avg_response_time - 1) * 10) # 最高50分
total_score = hit_score + time_score
if total_score >= 90:
return "A"
elif total_score >= 80:
return "B"
elif total_score >= 70:
return "C"
elif total_score >= 60:
return "D"
else:
return "F"
def generate_recommendations(self, hit_rate, avg_response_time):
"""生成优化建议"""
recommendations = []
if hit_rate < 0.5:
recommendations.append("缓存命中率偏低,建议扩大缓存范围或优化缓存策略")
if avg_response_time > 3.0:
recommendations.append("响应时间较长,建议优化缓存结构或增加预热")
if hit_rate > 0.8 and avg_response_time < 1.5:
recommendations.append("缓存性能良好,可以考虑处理更复杂的任务")
return recommendations
# 监控使用示例
monitor = CachePerformanceMonitor()
# 模拟请求记录
sample_requests = [
{"cache_hit": True, "response_time": 0.8, "cost_saving": 0.02},
{"cache_hit": False, "response_time": 2.1, "cost_saving": 0},
{"cache_hit": True, "response_time": 0.6, "cost_saving": 0.03},
{"cache_hit": True, "response_time": 0.9, "cost_saving": 0.025}
]
for request in sample_requests:
monitor.record_request(request)
performance_summary = monitor.get_performance_summary()
print(f"缓存命中率:{performance_summary['cache_hit_rate']:.2%}")
print(f"平均响应时间:{performance_summary['avg_response_time']:.2f}秒")
print(f"性能等级:{performance_summary['performance_grade']}")
故障排除指南
常见问题诊断
python
class CacheTroubleshooter:
"""缓存故障排除器"""
def __init__(self):
self.diagnostic_checks = [
self.check_token_requirements,
self.check_model_support,
self.check_cache_format,
self.check_content_suitability,
self.check_api_configuration
]
def diagnose_cache_issues(self, cache_request, error_info=None):
"""诊断缓存问题"""
diagnosis_results = []
for check in self.diagnostic_checks:
result = check(cache_request, error_info)
diagnosis_results.append(result)
# 综合诊断结果
issues_found = [r for r in diagnosis_results if not r["passed"]]
return {
"overall_status": "healthy" if len(issues_found) == 0 else "issues_found",
"issues_count": len(issues_found),
"issues": issues_found,
"recommendations": self.generate_fix_recommendations(issues_found)
}
def check_token_requirements(self, request, error_info):
"""检查token要求"""
content = self.extract_cacheable_content(request)
estimated_tokens = len(content.split()) * 1.3 # 简化估算
model = request.get("model", "")
min_tokens = 2048 if "haiku" in model else 1024
if estimated_tokens >= min_tokens:
return {
"check": "token_requirements",
"passed": True,
"message": f"Token数量满足要求 ({estimated_tokens:.0f} >= {min_tokens})"
}
else:
return {
"check": "token_requirements",
"passed": False,
"message": f"Token数量不足 ({estimated_tokens:.0f} < {min_tokens})",
"fix": f"内容至少需要 {min_tokens} 个token才能缓存"
}
def check_model_support(self, request, error_info):
"""检查模型支持"""
model = request.get("model", "")
supported_models = [
"claude-opus-4", "claude-sonnet-4", "claude-sonnet-3-7",
"claude-sonnet-3-5", "claude-haiku-3-5", "claude-haiku-3",
"claude-opus-3"
]
model_supported = any(supported in model for supported in supported_models)
if model_supported:
return {
"check": "model_support",
"passed": True,
"message": f"模型 {model} 支持缓存"
}
else:
return {
"check": "model_support",
"passed": False,
"message": f"模型 {model} 不支持缓存",
"fix": "请使用支持缓存的模型版本"
}
def check_cache_format(self, request, error_info):
"""检查缓存格式"""
has_cache_control = self.find_cache_control_in_request(request)
if has_cache_control:
return {
"check": "cache_format",
"passed": True,
"message": "发现正确的缓存控制格式"
}
else:
return {
"check": "cache_format",
"passed": False,
"message": "未找到缓存控制标记",
"fix": "添加 cache_control: {type: 'ephemeral'} 到需要缓存的内容"
}
def extract_cacheable_content(self, request):
"""提取可缓存内容"""
content = ""
if "system" in request:
content += str(request["system"])
if "messages" in request:
for message in request["messages"]:
if isinstance(message.get("content"), list):
for part in message["content"]:
if part.get("cache_control"):
content += part.get("text", "")
return content
def find_cache_control_in_request(self, request):
"""在请求中查找缓存控制"""
# 检查系统消息
if isinstance(request.get("system"), list):
for item in request["system"]:
if item.get("cache_control"):
return True
# 检查用户消息
if "messages" in request:
for message in request["messages"]:
if isinstance(message.get("content"), list):
for part in message["content"]:
if part.get("cache_control"):
return True
return False
def generate_fix_recommendations(self, issues):
"""生成修复建议"""
if not issues:
return ["缓存配置正常,无需修复"]
recommendations = []
for issue in issues:
if "fix" in issue:
recommendations.append(f"{issue['check']}: {issue['fix']}")
# 添加通用建议
recommendations.extend([
"确保API密钥有效且有足够的配额",
"检查网络连接是否稳定",
"考虑实施重试机制处理临时错误"
])
return recommendations
# 故障排除使用示例
troubleshooter = CacheTroubleshooter()
# 示例问题请求
problematic_request = {
"model": "claude-haiku-3-5-20241022",
"messages": [
{
"role": "user",
"content": "简短的问题" # 内容太短,无法缓存
}
]
}
diagnosis = troubleshooter.diagnose_cache_issues(problematic_request)
print("缓存诊断结果:")
print(f"状态:{diagnosis['overall_status']}")
print(f"发现问题:{diagnosis['issues_count']} 个")
for issue in diagnosis['issues']:
print(f"- {issue['check']}: {issue['message']}")
print("\n修复建议:")
for rec in diagnosis['recommendations']:
print(f"- {rec}")
通过实施这些缓存策略和优化技术,可以显著提升Claude API的性能表现,降低使用成本,并提供更流畅的用户体验。