前言:设计模式有策略模式,中介者模式。策略模式:有一个核心决策层(LLM),对输入信息做出决策需要用到的分支(单一agent或者skill,mcp等),具体执行分支通过一个中介(agent)分发给不同的agent,最终由中介收集执行者的反馈进行下一步。
省略版:中介agent接收输入信息或者执行者agent反馈的信息发送给LLM,LLM分析识别意图以后决策出执行agent,由中介agent转发对应执行者并收集反馈发送给LLM。

中介者agent职责:接收用户输入;管理会话上下文;分发决策给执行者;接收执行者反馈
python
class ConversationContext:
def __init__(self):
self.turns = [] # 对话轮次
self.current_turn_id = None
self.working_memory = {} # 工作记忆,类似人类的短期记忆
def start_turn(self, user_input):
"""开始一轮新对话"""
turn_id = generate_turn_id()
self.current_turn_id = turn_id
turn = {
"id": turn_id,
"user_input": user_input,
"timestamp": now(),
"state": "started",
"llm_decisions": [],
"executor_results": [],
"final_response": None
}
self.turns.append(turn)
return turn
def add_llm_decision(self, decision):
"""记录LLM的决策"""
turn = self._get_current_turn()
turn["llm_decisions"].append({
"decision": decision,
"timestamp": now(),
"decision_id": generate_id()
})
# 更新工作记忆
self._update_working_memory(decision)
def add_executor_result(self, executor_name, result):
"""记录执行者返回的结果"""
turn = self._get_current_turn()
turn["executor_results"].append({
"executor": executor_name,
"result": result,
"timestamp": now()
})
# 缓存结果供后续决策使用
self.temp_data[f"last_{executor_name}_result"] = result
def _update_working_memory(self, decision):
"""维护工作记忆,自动管理容量"""
# 提取关键信息存入工作记忆
if "key_entities" in decision:
for entity in decision["key_entities"]:
self.working_memory[entity["type"]] = entity["value"]
# 工作记忆容量限制(类似人类的7±2原则)
if len(self.working_memory) > 7:
# 移除最旧的条目(LRU策略)
oldest_key = min(self.working_memory.keys(),
key=lambda k: self.working_memory[k].get("last_access", 0))
del self.working_memory[oldest_key]
任务队列和依赖管理
python
class TaskManager:
def __init__(self):
self.task_queue = PriorityQueue()
self.task_dependencies = {} # 任务依赖图
self.task_results = {}
def create_task(self, executor, params, dependencies=None):
"""创建任务并处理依赖关系"""
task_id = generate_task_id()
task = {
"id": task_id,
"executor": executor,
"params": params,
"status": "pending",
"dependencies": dependencies or [],
"retry_count": 0,
"max_retries": 3,
"created_at": now()
}
if dependencies:
# 有依赖,放入等待队列
self.task_dependencies[task_id] = dependencies
self._add_to_waiting_queue(task)
else:
# 无依赖,直接进入执行队列
self.task_queue.put((task["priority"], task))
return task_id
def resolve_dependencies(self, completed_task_id, result):
"""处理任务完成后的依赖解析"""
self.task_results[completed_task_id] = result
# 检查所有等待的任务
for task_id, deps in self.task_dependencies.items():
if completed_task_id in deps:
deps.remove(completed_task_id)
if not deps: # 所有依赖都完成了
task = self._get_waiting_task(task_id)
task["params"] = self._inject_dependency_results(
task["params"],
self.task_results
)
self.task_queue.put((task["priority"], task))
3.3 对话状态持久化
python
class SessionPersistence:
def __init__(self, storage_backend):
self.storage = storage_backend
self.cache = {} # 内存缓存
async def save_session_checkpoint(self, session_id, context):
"""保存会话检查点,支持断点续聊"""
checkpoint = {
"session_id": session_id,
"timestamp": now(),
"context": context.to_dict(),
"task_manager_state": self._serialize_task_manager(context.task_manager),
"conversation_history": context.conversation.turns[-10:], # 保存最近10轮
"metadata": {
"version": "1.0",
"checksum": self._calculate_checksum(context)
}
}
# 异步持久化
await self.storage.save(f"session_{session_id}", checkpoint)
# 更新缓存
self.cache[session_id] = checkpoint
async def restore_session(self, session_id):
"""恢复会话状态"""
if session_id in self.cache:
checkpoint = self.cache[session_id]
else:
checkpoint = await self.storage.load(f"session_{session_id}")
# 验证完整性
if not self._verify_checksum(checkpoint):
raise SessionCorruptionError()
# 重建上下文
context = self._rebuild_context(checkpoint)
return context
上下文窗口管理
python
class ContextWindowManager:
def __init__(self, max_tokens=4000):
self.max_tokens = max_tokens
self.window = []
self.token_counts = []
def add_to_window(self, content, content_type):
"""智能管理上下文窗口,确保不超限"""
tokens = self._count_tokens(content)
# 如果超过限制,压缩历史
while sum(self.token_counts) + tokens > self.max_tokens:
self._evict_oldest()
self.window.append({
"content": content,
"type": content_type,
"tokens": tokens,
"timestamp": now(),
"importance": self._calculate_importance(content)
})
self.token_counts.append(tokens)
def _evict_oldest(self):
"""根据重要性和时间驱逐旧内容"""
# 优先移除重要性低且时间久的内容
self.window.sort(key=lambda x: (x["importance"], -x["timestamp"].timestamp()))
removed = self.window.pop(0)
self.token_counts.pop(0)
return removed
意图跟踪和消歧
python
class IntentTracker:
def __init__(self):
self.intent_stack = [] # 意图调用栈
self.pending_clarifications = [] # 待澄清的问题
def track_intent_flow(self, intent, parent_intent=None):
"""跟踪意图的嵌套关系"""
intent_frame = {
"intent": intent,
"parent": parent_intent,
"status": "active",
"start_time": now(),
"sub_intents": []
}
if parent_intent:
# 找到父意图并添加子意图
parent_frame = self._find_active_intent(parent_intent)
if parent_frame:
parent_frame["sub_intents"].append(intent_frame)
else:
self.intent_stack.append(intent_frame)
def needs_clarification(self, ambiguity):
"""处理意图不明确的情况"""
clarification = {
"id": generate_id(),
"ambiguity": ambiguity,
"asked": False,
"response": None
}
self.pending_clarifications.append(clarification)
return clarification["id"]
异常处理与会话恢复
python
class SessionRecovery:
def __init__(self, session_manager):
self.session_manager = session_manager
self.recovery_strategies = {
"timeout": self._handle_timeout,
"executor_failure": self._handle_executor_failure,
"context_overflow": self._handle_context_overflow,
"llm_error": self._handle_llm_error
}
async def recover_session(self, session_id, error_type, error_info):
"""智能会话恢复"""
# 1. 保存错误现场
snapshot = await self.session_manager.create_snapshot(session_id)
# 2. 选择恢复策略
strategy = self.recovery_strategies.get(error_type)
if not strategy:
strategy = self._default_recovery
# 3. 执行恢复
recovered_state = await strategy(snapshot, error_info)
# 4. 验证恢复结果
if self._validate_recovery(recovered_state):
return recovered_state
else:
# 降级恢复:重新开始当前轮次
return await self._fallback_recovery(snapshot)
async def _handle_timeout(self, snapshot, error_info):
"""处理超时情况"""
# 检查是哪个环节超时
timeout_point = error_info.get("point")
if timeout_point == "llm_decision":
# LLM决策超时,使用缓存决策或简化处理
return await self._use_cached_decision(snapshot)
elif timeout_point == "executor":
# 执行者超时,尝试备用执行者
return await self._try_alternative_executor(snapshot)
async def _handle_context_overflow(self, snapshot, error_info):
"""处理上下文溢出"""
# 智能摘要:将早期对话压缩成摘要
context = snapshot["context"]
early_turns = context["conversation_history"][:-5] # 除最近5轮外的历史
# 使用LLM生成摘要
summary = await self._generate_summary(early_turns)
# 重建上下文:摘要 + 最近5轮完整记录
new_context = {
"summary": summary,
"recent_history": context["conversation_history"][-5:],
"active_intent": context["active_intent"]
}
return new_context