假设我要实现一个agent群体

前言：设计模式有策略模式，中介者模式。策略模式：有一个核心决策层（LLM），对输入信息做出决策需要用到的分支（单一agent或者skill,mcp等），具体执行分支通过一个中介（agent）分发给不同的agent，最终由中介收集执行者的反馈进行下一步。

省略版：中介agent接收输入信息或者执行者agent反馈的信息发送给LLM，LLM分析识别意图以后决策出执行agent，由中介agent转发对应执行者并收集反馈发送给LLM。

中介者agent职责：接收用户输入；管理会话上下文；分发决策给执行者；接收执行者反馈

python 复制代码

class ConversationContext:
    def __init__(self):
        self.turns = []  # 对话轮次
        self.current_turn_id = None
        self.working_memory = {}  # 工作记忆，类似人类的短期记忆
    def start_turn(self, user_input):
        """开始一轮新对话"""
        turn_id = generate_turn_id()
        self.current_turn_id = turn_id
    
        turn = {
        "id": turn_id,
        "user_input": user_input,
        "timestamp": now(),
        "state": "started",
        "llm_decisions": [],
        "executor_results": [],
        "final_response": None
    }
    self.turns.append(turn)
    return turn
    
    def add_llm_decision(self, decision):
    """记录LLM的决策"""
    turn = self._get_current_turn()
    turn["llm_decisions"].append({
        "decision": decision,
        "timestamp": now(),
        "decision_id": generate_id()
    })
    
    # 更新工作记忆
    self._update_working_memory(decision)
    
def add_executor_result(self, executor_name, result):
    """记录执行者返回的结果"""
    turn = self._get_current_turn()
    turn["executor_results"].append({
        "executor": executor_name,
        "result": result,
        "timestamp": now()
    })
    
    # 缓存结果供后续决策使用
    self.temp_data[f"last_{executor_name}_result"] = result
    
def _update_working_memory(self, decision):
    """维护工作记忆，自动管理容量"""
    # 提取关键信息存入工作记忆
    if "key_entities" in decision:
        for entity in decision["key_entities"]:
            self.working_memory[entity["type"]] = entity["value"]
    
    # 工作记忆容量限制（类似人类的7±2原则）
    if len(self.working_memory) > 7:
        # 移除最旧的条目（LRU策略）
        oldest_key = min(self.working_memory.keys(), 
                       key=lambda k: self.working_memory[k].get("last_access", 0))
        del self.working_memory[oldest_key]

任务队列和依赖管理

python 复制代码

class TaskManager:
    def __init__(self):
        self.task_queue = PriorityQueue()
        self.task_dependencies = {}  # 任务依赖图
        self.task_results = {}
        
    def create_task(self, executor, params, dependencies=None):
        """创建任务并处理依赖关系"""
        task_id = generate_task_id()
        task = {
            "id": task_id,
            "executor": executor,
            "params": params,
            "status": "pending",
            "dependencies": dependencies or [],
            "retry_count": 0,
            "max_retries": 3,
            "created_at": now()
        }
        
        if dependencies:
            # 有依赖，放入等待队列
            self.task_dependencies[task_id] = dependencies
            self._add_to_waiting_queue(task)
        else:
            # 无依赖，直接进入执行队列
            self.task_queue.put((task["priority"], task))
            
        return task_id
        
    def resolve_dependencies(self, completed_task_id, result):
        """处理任务完成后的依赖解析"""
        self.task_results[completed_task_id] = result
        
        # 检查所有等待的任务
        for task_id, deps in self.task_dependencies.items():
            if completed_task_id in deps:
                deps.remove(completed_task_id)
                if not deps:  # 所有依赖都完成了
                    task = self._get_waiting_task(task_id)
                    task["params"] = self._inject_dependency_results(
                        task["params"], 
                        self.task_results
                    )
                    self.task_queue.put((task["priority"], task))

3.3 对话状态持久化

python 复制代码

class SessionPersistence:
    def __init__(self, storage_backend):
        self.storage = storage_backend
        self.cache = {}  # 内存缓存
        
    async def save_session_checkpoint(self, session_id, context):
        """保存会话检查点，支持断点续聊"""
        checkpoint = {
            "session_id": session_id,
            "timestamp": now(),
            "context": context.to_dict(),
            "task_manager_state": self._serialize_task_manager(context.task_manager),
            "conversation_history": context.conversation.turns[-10:],  # 保存最近10轮
            "metadata": {
                "version": "1.0",
                "checksum": self._calculate_checksum(context)
            }
        }
        
        # 异步持久化
        await self.storage.save(f"session_{session_id}", checkpoint)
        
        # 更新缓存
        self.cache[session_id] = checkpoint
        
    async def restore_session(self, session_id):
        """恢复会话状态"""
        if session_id in self.cache:
            checkpoint = self.cache[session_id]
        else:
            checkpoint = await self.storage.load(f"session_{session_id}")
            
        # 验证完整性
        if not self._verify_checksum(checkpoint):
            raise SessionCorruptionError()
            
        # 重建上下文
        context = self._rebuild_context(checkpoint)
        return context

上下文窗口管理

python 复制代码

class ContextWindowManager:
    def __init__(self, max_tokens=4000):
        self.max_tokens = max_tokens
        self.window = []
        self.token_counts = []
        
    def add_to_window(self, content, content_type):
        """智能管理上下文窗口，确保不超限"""
        tokens = self._count_tokens(content)
        
        # 如果超过限制，压缩历史
        while sum(self.token_counts) + tokens > self.max_tokens:
            self._evict_oldest()
            
        self.window.append({
            "content": content,
            "type": content_type,
            "tokens": tokens,
            "timestamp": now(),
            "importance": self._calculate_importance(content)
        })
        self.token_counts.append(tokens)
        
    def _evict_oldest(self):
        """根据重要性和时间驱逐旧内容"""
        # 优先移除重要性低且时间久的内容
        self.window.sort(key=lambda x: (x["importance"], -x["timestamp"].timestamp()))
        removed = self.window.pop(0)
        self.token_counts.pop(0)
        return removed

意图跟踪和消歧

python 复制代码

class IntentTracker:
    def __init__(self):
        self.intent_stack = []  # 意图调用栈
        self.pending_clarifications = []  # 待澄清的问题
        
    def track_intent_flow(self, intent, parent_intent=None):
        """跟踪意图的嵌套关系"""
        intent_frame = {
            "intent": intent,
            "parent": parent_intent,
            "status": "active",
            "start_time": now(),
            "sub_intents": []
        }
        
        if parent_intent:
            # 找到父意图并添加子意图
            parent_frame = self._find_active_intent(parent_intent)
            if parent_frame:
                parent_frame["sub_intents"].append(intent_frame)
        else:
            self.intent_stack.append(intent_frame)
            
    def needs_clarification(self, ambiguity):
        """处理意图不明确的情况"""
        clarification = {
            "id": generate_id(),
            "ambiguity": ambiguity,
            "asked": False,
            "response": None
        }
        self.pending_clarifications.append(clarification)
        return clarification["id"]

异常处理与会话恢复

python 复制代码

class SessionRecovery:
    def __init__(self, session_manager):
        self.session_manager = session_manager
        self.recovery_strategies = {
            "timeout": self._handle_timeout,
            "executor_failure": self._handle_executor_failure,
            "context_overflow": self._handle_context_overflow,
            "llm_error": self._handle_llm_error
        }
        
    async def recover_session(self, session_id, error_type, error_info):
        """智能会话恢复"""
        # 1. 保存错误现场
        snapshot = await self.session_manager.create_snapshot(session_id)
        
        # 2. 选择恢复策略
        strategy = self.recovery_strategies.get(error_type)
        if not strategy:
            strategy = self._default_recovery
            
        # 3. 执行恢复
        recovered_state = await strategy(snapshot, error_info)
        
        # 4. 验证恢复结果
        if self._validate_recovery(recovered_state):
            return recovered_state
        else:
            # 降级恢复：重新开始当前轮次
            return await self._fallback_recovery(snapshot)
            
    async def _handle_timeout(self, snapshot, error_info):
        """处理超时情况"""
        # 检查是哪个环节超时
        timeout_point = error_info.get("point")
        
        if timeout_point == "llm_decision":
            # LLM决策超时，使用缓存决策或简化处理
            return await self._use_cached_decision(snapshot)
            
        elif timeout_point == "executor":
            # 执行者超时，尝试备用执行者
            return await self._try_alternative_executor(snapshot)
            
    async def _handle_context_overflow(self, snapshot, error_info):
        """处理上下文溢出"""
        # 智能摘要：将早期对话压缩成摘要
        context = snapshot["context"]
        early_turns = context["conversation_history"][:-5]  # 除最近5轮外的历史
        
        # 使用LLM生成摘要
        summary = await self._generate_summary(early_turns)
        
        # 重建上下文：摘要 + 最近5轮完整记录
        new_context = {
            "summary": summary,
            "recent_history": context["conversation_history"][-5:],
            "active_intent": context["active_intent"]
        }
        
        return new_context