垂直Agent才是未来:详解让大模型"专业对口"的三大核心技术

1. 引言

在通用大模型快速发展的今天,垂直领域的专业化应用正成为AI落地的关键方向。垂直Agent通过结合领域专业知识,为大模型提供"专业对口"的能力,在医疗、金融、法律等专业领域展现出巨大价值。本文将详细解析实现垂直Agent的三大核心技术:RAG、ReAct和CoT,并提供完整的实操指南。

2. RAG技术详解与实现

2.1 RAG技术原理

检索增强生成通过将外部知识库与LLM结合,解决大模型的幻觉问题和知识滞后性。其核心流程包括文档处理、向量检索和生成增强三个环节。

2.1.1 RAG系统架构

创建文件:rag_architecture.py

python 复制代码
class RAGSystem:
    def __init__(self):
        self.document_processor = DocumentProcessor()
        self.vector_store = VectorStore()
        self.retriever = Retriever()
        self.generator = Generator()
    
    def process_documents(self, documents):
        """文档处理流程"""
        chunks = self.document_processor.split_documents(documents)
        embeddings = self.document_processor.generate_embeddings(chunks)
        self.vector_store.store_vectors(chunks, embeddings)
    
    def query(self, question, top_k=5):
        """查询处理流程"""
        query_embedding = self.retriever.encode_query(question)
        relevant_chunks = self.retriever.retrieve_similar(
            query_embedding, top_k
        )
        context = self._build_context(relevant_chunks)
        answer = self.generator.generate(question, context)
        return answer
    
    def _build_context(self, chunks):
        """构建上下文"""
        return "\n\n".join([chunk.text for chunk in chunks])

2.2 RAG完整实现步骤

步骤1:环境准备与依赖安装

创建文件:requirements_rag.txt

txt 复制代码
langchain==0.0.346
langchain-community==0.0.7
openai==1.3.0
chromadb==0.4.15
sentence-transformers==2.2.2
pypdf==3.17.0
tiktoken==0.5.1
faiss-cpu==1.7.4
numpy==1.24.3
python-dotenv==1.0.0

创建文件:setup_environment.py

python 复制代码
import os
import subprocess
import sys
from dotenv import load_dotenv

def setup_environment():
    """设置环境变量和依赖"""
    
    # 安装依赖
    try:
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", "-r", "requirements_rag.txt"
        ])
        print("✅ 依赖安装成功")
    except subprocess.CalledProcessError:
        print("❌ 依赖安装失败")
        return False
    
    # 加载环境变量
    load_dotenv()
    
    # 检查必要的环境变量
    required_vars = ['OPENAI_API_KEY']
    missing_vars = [var for var in required_vars if not os.getenv(var)]
    
    if missing_vars:
        print(f"❌ 缺少环境变量: {missing_vars}")
        print("请在 .env 文件中设置这些变量")
        return False
    
    print("✅ 环境设置完成")
    return True

if __name__ == "__main__":
    setup_environment()

步骤2:文档处理模块

创建文件:document_processor.py

python 复制代码
import os
from typing import List, Dict, Any
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    PyPDFLoader, TextLoader, Docx2txtLoader
)
from sentence_transformers import SentenceTransformer
import numpy as np

class DocumentProcessor:
    def __init__(self, chunk_size=1000, chunk_overlap=200):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            length_function=len,
            separators=["\n\n", "\n", "。", "!", "?", ";", ",", "、", " "]
        )
        self.embedding_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
    
    def load_documents(self, file_paths: List[str]) -> List[Dict[str, Any]]:
        """加载多种格式的文档"""
        documents = []
        
        for file_path in file_paths:
            if not os.path.exists(file_path):
                print(f"⚠️ 文件不存在: {file_path}")
                continue
                
            file_ext = os.path.splitext(file_path)[1].lower()
            
            try:
                if file_ext == '.pdf':
                    loader = PyPDFLoader(file_path)
                elif file_ext == '.txt':
                    loader = TextLoader(file_path, encoding='utf-8')
                elif file_ext in ['.docx', '.doc']:
                    loader = Docx2txtLoader(file_path)
                else:
                    print(f"⚠️ 不支持的文件格式: {file_ext}")
                    continue
                
                loaded_docs = loader.load()
                for doc in loaded_docs:
                    doc.metadata['source'] = file_path
                documents.extend(loaded_docs)
                print(f"✅ 成功加载: {file_path}")
                
            except Exception as e:
                print(f"❌ 加载文件失败 {file_path}: {str(e)}")
        
        return documents
    
    def split_documents(self, documents: List[Dict]) -> List[Dict]:
        """分割文档为chunk"""
        chunks = []
        
        for doc in documents:
            text_chunks = self.text_splitter.split_text(doc.page_content)
            
            for i, chunk in enumerate(text_chunks):
                chunk_data = {
                    'text': chunk,
                    'source': doc.metadata.get('source', 'unknown'),
                    'chunk_id': f"{doc.metadata.get('source', 'unknown')}_{i}",
                    'page': doc.metadata.get('page', 0)
                }
                chunks.append(chunk_data)
        
        print(f"✅ 文档分割完成,共生成 {len(chunks)} 个chunk")
        return chunks
    
    def generate_embeddings(self, chunks: List[Dict]) -> List[np.ndarray]:
        """为文本chunk生成嵌入向量"""
        texts = [chunk['text'] for chunk in chunks]
        embeddings = self.embedding_model.encode(texts)
        print(f"✅ 嵌入向量生成完成,维度: {embeddings.shape}")
        return embeddings

步骤3:向量存储与检索

创建文件:vector_store.py

python 复制代码
import faiss
import numpy as np
import json
import pickle
from typing import List, Dict, Any

class VectorStore:
    def __init__(self, dimension=384):  # MiniLM模型的维度
        self.dimension = dimension
        self.index = faiss.IndexFlatIP(dimension)  # 内积相似度
        self.chunks = []
        self.metadata = []
    
    def store_vectors(self, chunks: List[Dict], embeddings: np.ndarray):
        """存储向量和元数据"""
        if len(embeddings) == 0:
            raise ValueError("嵌入向量不能为空")
        
        # 归一化向量(用于余弦相似度)
        faiss.normalize_L2(embeddings)
        
        # 添加到索引
        self.index.add(embeddings.astype('float32'))
        
        # 存储元数据
        self.chunks.extend([chunk['text'] for chunk in chunks])
        self.metadata.extend(chunks)
        
        print(f"✅ 向量存储完成,当前索引大小: {self.index.ntotal}")
    
    def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Dict]:
        """相似度搜索"""
        if self.index.ntotal == 0:
            return []
        
        # 归一化查询向量
        query_embedding = query_embedding.reshape(1, -1).astype('float32')
        faiss.normalize_L2(query_embedding)
        
        # 搜索
        similarities, indices = self.index.search(query_embedding, top_k)
        
        results = []
        for i, idx in enumerate(indices[0]):
            if idx < len(self.metadata):
                result = {
                    'text': self.chunks[idx],
                    'metadata': self.metadata[idx],
                    'similarity': float(similarities[0][i])
                }
                results.append(result)
        
        return results
    
    def save(self, filepath: str):
        """保存向量库"""
        data = {
            'chunks': self.chunks,
            'metadata': self.metadata
        }
        
        # 保存数据
        with open(f"{filepath}_data.pkl", 'wb') as f:
            pickle.dump(data, f)
        
        # 保存FAISS索引
        faiss.write_index(self.index, f"{filepath}_index.faiss")
        
        print(f"✅ 向量库保存到: {filepath}")
    
    def load(self, filepath: str):
        """加载向量库"""
        # 加载数据
        with open(f"{filepath}_data.pkl", 'rb') as f:
            data = pickle.load(f)
        
        self.chunks = data['chunks']
        self.metadata = data['metadata']
        
        # 加载FAISS索引
        self.index = faiss.read_index(f"{filepath}_index.faiss")
        
        print(f"✅ 向量库从 {filepath} 加载完成")

步骤4:完整的RAG系统集成

创建文件:complete_rag_system.py

python 复制代码
import os
from openai import OpenAI
from document_processor import DocumentProcessor
from vector_store import VectorStore
from typing import List, Dict, Any

class CompleteRAGSystem:
    def __init__(self, openai_api_key: str = None):
        self.document_processor = DocumentProcessor()
        self.vector_store = VectorStore()
        self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
        
    def build_knowledge_base(self, document_paths: List[str]):
        """构建知识库"""
        print("📚 开始构建知识库...")
        
        # 加载文档
        documents = self.document_processor.load_documents(document_paths)
        if not documents:
            raise ValueError("没有成功加载任何文档")
        
        # 分割文档
        chunks = self.document_processor.split_documents(documents)
        
        # 生成嵌入向量
        embeddings = self.document_processor.generate_embeddings(chunks)
        
        # 存储向量
        self.vector_store.store_vectors(chunks, embeddings)
        
        print("✅ 知识库构建完成")
    
    def retrieve_relevant_info(self, query: str, top_k: int = 5) -> List[Dict]:
        """检索相关信息"""
        query_embedding = self.document_processor.embedding_model.encode([query])
        results = self.vector_store.search(query_embedding, top_k)
        return results
    
    def generate_answer(self, query: str, context: List[Dict]) -> str:
        """基于上下文生成答案"""
        context_text = "\n\n".join([
            f"来源: {item['metadata']['source']}\n内容: {item['text']}" 
            for item in context
        ])
        
        prompt = f"""基于以下上下文信息,请回答问题。如果上下文没有提供足够信息,请明确说明。

上下文:
{context_text}

问题: {query}

请提供准确、专业的回答,并注明信息来源。"""

        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "你是一个专业的助手,基于提供的上下文信息回答问题。"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=1000
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"生成回答时出错: {str(e)}"
    
    def query(self, question: str, top_k: int = 5) -> Dict[str, Any]:
        """完整查询流程"""
        print(f"🔍 处理查询: {question}")
        
        # 检索
        relevant_info = self.retrieve_relevant_info(question, top_k)
        print(f"📄 检索到 {len(relevant_info)} 个相关文档片段")
        
        # 生成
        answer = self.generate_answer(question, relevant_info)
        
        return {
            'question': question,
            'answer': answer,
            'sources': relevant_info,
            'retrieved_count': len(relevant_info)
        }

# 使用示例
def main():
    # 初始化系统
    rag_system = CompleteRAGSystem()
    
    # 文档路径(请替换为实际路径)
    document_paths = [
        "documents/technical_manual.pdf",
        "documents/faq.txt",
        "documents/procedures.docx"
    ]
    
    # 构建知识库
    rag_system.build_knowledge_base(document_paths)
    
    # 保存向量库
    rag_system.vector_store.save("my_knowledge_base")
    
    # 查询示例
    questions = [
        "产品的技术规格是什么?",
        "如何解决常见问题?",
        "操作步骤有哪些?"
    ]
    
    for question in questions:
        result = rag_system.query(question)
        print(f"\n❓ 问题: {result['question']}")
        print(f"🤖 回答: {result['answer']}")
        print(f"📚 来源数: {result['retrieved_count']}")
        print("-" * 50)

if __name__ == "__main__":
    main()

2.3 RAG系统流程图

创建文件:rag_flowchart.mermaid

flowchart TD A[用户输入查询] --> B[查询嵌入向量化] B --> C[向量相似度搜索] D[文档库] --> E[文档加载与解析] E --> F[文本分割与块处理] F --> G[文本嵌入向量化] G --> H[向量存储索引] C --> I[检索相关文档块] I --> J[构建上下文提示] J --> K[LLM生成回答] K --> L[返回最终结果] subgraph 知识库构建 D --> E --> F --> G --> H end subgraph 查询处理 A --> B --> C --> I --> J --> K --> L end style A fill:#4CAF50,stroke:#388E3C,color:white style L fill:#2196F3,stroke:#1976D2,color:white style D fill:#FF9800,stroke:#F57C00,color:white style H fill:#9C27B0,stroke:#7B1FA2,color:white style K fill:#607D8B,stroke:#455A64,color:white

3. ReAct技术详解与实现

3.1 ReAct技术原理

ReAct框架通过结合推理和行动,让LLM能够动态规划和执行动作来解决复杂问题。

3.1.1 ReAct核心组件

创建文件:react_framework.py

python 复制代码
from typing import List, Dict, Any, Tuple
from enum import Enum
import re
import json

class ActionType(Enum):
    """动作类型枚举"""
    SEARCH = "search"
    CALCULATE = "calculate"
    LOOKUP = "lookup"
    FINISH = "finish"

class ReActAgent:
    def __init__(self, tools: Dict, max_steps: int = 10):
        self.tools = tools
        self.max_steps = max_steps
        self.conversation_history = []
    
    def parse_thought_action(self, response: str) -> Tuple[str, str, str]:
        """解析思维和动作"""
        thought_pattern = r"Thought:\s*(.*?)(?=\nAction:|\n$)"
        action_pattern = r"Action:\s*(\w+)\s*:\s*(.*?)(?=\n|\nThought:|\nAction:|\nObservation:|\n$)"
        
        thought_match = re.search(thought_pattern, response, re.DOTALL)
        action_match = re.search(action_pattern, response, re.DOTALL)
        
        thought = thought_match.group(1).strip() if thought_match else ""
        action_type = action_match.group(1) if action_match else ""
        action_input = action_match.group(2) if action_match else ""
        
        return thought, action_type, action_input
    
    def execute_action(self, action_type: str, action_input: str) -> str:
        """执行动作"""
        if action_type not in self.tools:
            return f"错误: 未知动作类型 '{action_type}'"
        
        try:
            # 尝试解析JSON输入
            if action_input.strip().startswith('{'):
                params = json.loads(action_input)
            else:
                params = {"query": action_input.strip()}
        except:
            params = {"query": action_input.strip()}
        
        try:
            result = self.tools[action_type](**params)
            return str(result)
        except Exception as e:
            return f"执行动作时出错: {str(e)}"
    
    def run(self, query: str) -> Dict[str, Any]:
        """运行ReAct循环"""
        steps = []
        current_step = 0
        
        # 初始提示
        prompt = self._build_initial_prompt(query)
        full_response = ""
        
        while current_step < self.max_steps:
            print(f"🔄 步骤 {current_step + 1}")
            
            # 获取LLM响应
            llm_response = self._call_llm(prompt)
            full_response += llm_response + "\n"
            
            # 解析思维和动作
            thought, action_type, action_input = self.parse_thought_action(llm_response)
            
            step_data = {
                "step": current_step + 1,
                "thought": thought,
                "action_type": action_type,
                "action_input": action_input,
                "observation": ""
            }
            
            # 检查是否完成
            if action_type.lower() == "finish":
                step_data["observation"] = "任务完成"
                steps.append(step_data)
                break
            
            # 执行动作
            if action_type and action_type in self.tools:
                observation = self.execute_action(action_type, action_input)
                step_data["observation"] = observation
                
                # 构建下一步提示
                prompt += f"\n{llm_response}\nObservation: {observation}\n"
            else:
                step_data["observation"] = f"无效动作: {action_type}"
                prompt += f"\n{llm_response}\nObservation: {step_data['observation']}\n"
            
            steps.append(step_data)
            current_step += 1
        
        return {
            "query": query,
            "final_answer": self._extract_final_answer(full_response),
            "steps": steps,
            "total_steps": len(steps)
        }
    
    def _build_initial_prompt(self, query: str) -> str:
        """构建初始提示"""
        tools_description = "\n".join([
            f"- {tool_name}: {tool.__doc__}" for tool_name, tool in self.tools.items()
        ])
        
        prompt = f"""你是一个智能助手,使用ReAct框架解决问题。按照以下格式响应:

Thought: 你的思考过程
Action: 动作类型: 动作输入
Observation: 动作执行结果
...(这个循环重复)

当你有最终答案时:
Thought: 我认为我有答案了
Action: finish: 最终答案

可用工具:
{tools_description}

开始!

问题: {query}
"""
        return prompt
    
    def _call_llm(self, prompt: str) -> str:
        """调用LLM(简化版,实际使用时需要接入真实LLM)"""
        # 这里应该接入真实的LLM API
        # 返回模拟响应用于演示
        return "Thought: 我需要分析这个问题并决定使用哪个工具。"
    
    def _extract_final_answer(self, response: str) -> str:
        """从响应中提取最终答案"""
        finish_pattern = r"Action:\s*finish\s*:\s*(.*?)(?=\n|$)"
        match = re.search(finish_pattern, response, re.DOTALL)
        return match.group(1).strip() if match else "未找到明确答案"

3.2 ReAct完整实现

步骤1:工具函数实现

创建文件:react_tools.py

python 复制代码
import math
import requests
from typing import Dict, Any
import json

class ReActTools:
    """ReAct工具集合"""
    
    @staticmethod
    def search(query: str) -> str:
        """
        搜索工具:模拟网络搜索
        """
        # 模拟搜索返回结果
        mock_data = {
            "python编程": "Python是一种高级编程语言,以简洁易读著称。",
            "机器学习": "机器学习是人工智能的一个分支,专注于算法开发。",
            "深度学习": "深度学习使用神经网络进行模式识别和预测。",
            "自然语言处理": "NLP使计算机能够理解、解释和生成人类语言。"
        }
        
        return mock_data.get(query.lower(), f"未找到关于 '{query}' 的信息")
    
    @staticmethod
    def calculate(expression: str) -> str:
        """
        计算工具:执行数学计算
        """
        try:
            # 安全地评估数学表达式
            allowed_chars = set('0123456789+-*/.() ')
            if all(c in allowed_chars for c in expression):
                result = eval(expression)
                return f"计算结果: {expression} = {result}"
            else:
                return "错误: 表达式包含不安全字符"
        except Exception as e:
            return f"计算错误: {str(e)}"
    
    @staticmethod
    def lookup(keyword: str) -> str:
        """
        查找工具:在知识库中查找信息
        """
        knowledge_base = {
            "openai": "OpenAI是一家AI研究公司,开发了GPT系列模型。",
            "transformer": "Transformer是一种基于自注意力机制的神经网络架构。",
            "rag": "RAG(检索增强生成)结合检索系统和生成模型。",
            "react": "ReAct框架结合推理和行动来解决复杂问题。"
        }
        
        return knowledge_base.get(keyword.lower(), f"未找到关于 '{keyword}' 的信息")
    
    @staticmethod
    def get_weather(city: str) -> str:
        """
        天气查询工具:获取城市天气信息
        """
        # 模拟天气数据
        weather_data = {
            "北京": "北京: 晴, 温度 25°C, 湿度 45%",
            "上海": "上海: 多云, 温度 28°C, 湿度 60%", 
            "广州": "广州: 阵雨, 温度 30°C, 湿度 75%",
            "深圳": "深圳: 阴, 温度 29°C, 湿度 70%"
        }
        
        return weather_data.get(city, f"未找到城市 '{city}' 的天气信息")

def create_tool_set() -> Dict[str, callable]:
    """创建工具集合"""
    tools = ReActTools()
    
    return {
        "search": tools.search,
        "calculate": tools.calculate, 
        "lookup": tools.lookup,
        "get_weather": tools.get_weather
    }

步骤2:集成OpenAI的ReAct系统

创建文件:openai_react_system.py

python 复制代码
import os
from openai import OpenAI
from react_framework import ReActAgent
from react_tools import create_tool_set
import json
from typing import Dict, Any

class OpenAIReActSystem(ReActAgent):
    def __init__(self, openai_api_key: str = None, model: str = "gpt-3.5-turbo"):
        tools = create_tool_set()
        super().__init__(tools, max_steps=8)
        
        self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
        self.model = model
    
    def _call_llm(self, prompt: str) -> str:
        """调用OpenAI API"""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": "你是一个使用ReAct框架的智能助手。严格按照指定格式响应。"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.1,
                max_tokens=500
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"Thought: 调用API时出错\nAction: finish: API错误 - {str(e)}"

def demonstrate_react_system():
    """演示ReAct系统"""
    system = OpenAIReActSystem()
    
    # 测试问题
    test_questions = [
        "计算一下 (15 + 27) * 3 等于多少?然后搜索一下机器学习的基本概念。",
        "查找RAG的信息,然后获取北京的天气情况。",
        "什么是深度学习?它和机器学习有什么关系?"
    ]
    
    for question in test_questions:
        print(f"\n{'='*60}")
        print(f"❓ 问题: {question}")
        print(f"{'='*60}")
        
        result = system.run(question)
        
        # 显示执行步骤
        for step in result['steps']:
            print(f"\n🔹 步骤 {step['step']}:")
            print(f"   💭 思考: {step['thought']}")
            print(f"   ⚡ 动作: {step['action_type']}: {step['action_input']}")
            print(f"   👀 观察: {step['observation']}")
        
        print(f"\n✅ 最终答案: {result['final_answer']}")
        print(f"📊 总步骤数: {result['total_steps']}")

if __name__ == "__main__":
    demonstrate_react_system()

3.3 ReAct流程图

创建文件:react_flowchart.mermaid

flowchart TD A[用户输入问题] --> B[初始化ReAct代理] B --> C[构建初始提示] C --> D[LLM生成响应] D --> E{解析Thought/Action} E --> F{动作类型} F -->|工具动作| G[执行工具函数] G --> H[获取观察结果] H --> I[更新对话历史] I --> C F -->|Finish动作| J[提取最终答案] J --> K[返回完整结果] F -->|无效动作| L[记录错误观察] L --> I M[工具集合] --> G style A fill:#4CAF50,stroke:#388E3C,color:white style K fill:#2196F3,stroke:#1976D2,color:white style D fill:#FF9800,stroke:#F57C00,color:white style G fill:#9C27B0,stroke:#7B1FA2,color:white style M fill:#607D8B,stroke:#455A64,color:white

4. CoT技术详解与实现

4.1 CoT技术原理

思维链通过引导模型展示推理过程,显著提升复杂问题的解决能力。

4.1.1 CoT提示工程

创建文件:cot_prompt_engineering.py

python 复制代码
from typing import List, Dict, Any
import re

class CoTPromptEngineer:
    def __init__(self):
        self.cot_templates = {
            "math": self._math_cot_template,
            "logic": self._logic_cot_template,
            "analysis": self._analysis_cot_template,
            "general": self._general_cot_template
        }
    
    def create_cot_prompt(self, question: str, category: str = "general") -> str:
        """创建CoT提示"""
        template = self.cot_templates.get(category, self._general_cot_template)
        return template(question)
    
    def _math_cot_template(self, question: str) -> str:
        """数学问题CoT模板"""
        return f"""请逐步解决以下数学问题。展示你的完整推理过程。

问题: {question}

请按照以下格式回答:
步骤1: [第一步推理]
步骤2: [第二步推理]
...
最终答案: [答案]

现在开始:"""
    
    def _logic_cot_template(self, question: str) -> str:
        """逻辑问题CoT模板"""
        return f"""请逻辑推理以下问题。展示你的思考链条。

问题: {question}

请按照以下格式回答:
分析: [问题分析]
假设: [作出的假设]
推理: [逻辑推理过程]
验证: [验证推理]
结论: [最终结论]

现在开始:"""
    
    def _analysis_cot_template(self, question: str) -> str:
        """分析问题CoT模板"""
        return f"""请深入分析以下问题。展示你的多层次思考。

问题: {question}

请按照以下格式回答:
第一层思考: [表面分析]
第二层思考: [深入分析] 
第三层思考: [综合考量]
关键洞察: [核心发现]
最终观点: [总结观点]

现在开始:"""
    
    def _general_cot_template(self, question: str) -> str:
        """通用CoT模板"""
        return f"""请仔细思考并逐步回答以下问题。展示你的完整推理过程。

问题: {question}

让我们一步一步思考:

第一步:"""
    
    def parse_cot_response(self, response: str) -> Dict[str, Any]:
        """解析CoT响应"""
        steps = []
        final_answer = ""
        
        # 提取步骤
        step_pattern = r"(?:步骤|Step)\s*(\d+)[::]\s*(.*?)(?=(?:步骤|Step|\n最终答案|最终结论|$))"
        step_matches = re.findall(step_pattern, response, re.IGNORECASE | re.DOTALL)
        
        for step_num, step_content in step_matches:
            steps.append({
                "step": int(step_num),
                "content": step_content.strip()
            })
        
        # 提取最终答案
        answer_patterns = [
            r"最终答案[::]\s*(.*?)$",
            r"最终结论[::]\s*(.*?)$", 
            r"结论[::]\s*(.*?)$",
            r"最终观点[::]\s*(.*?)$"
        ]
        
        for pattern in answer_patterns:
            match = re.search(pattern, response, re.IGNORECASE | re.DOTALL)
            if match:
                final_answer = match.group(1).strip()
                break
        
        return {
            "steps": steps,
            "final_answer": final_answer,
            "reasoning_length": len(response),
            "step_count": len(steps)
        }

4.2 CoT完整实现

步骤1:CoT推理系统

创建文件:cot_reasoning_system.py

python 复制代码
import os
from openai import OpenAI
from cot_prompt_engineering import CoTPromptEngineer
from typing import Dict, Any, List
import time

class CoTReasoningSystem:
    def __init__(self, openai_api_key: str = None, model: str = "gpt-3.5-turbo"):
        self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
        self.model = model
        self.prompt_engineer = CoTPromptEngineer()
        self.conversation_history = []
    
    def analyze_question_type(self, question: str) -> str:
        """分析问题类型"""
        math_keywords = ["计算", "等于", "数学", "方程", "公式", "多少"]
        logic_keywords = ["推理", "逻辑", "如果", "那么", "因为", "所以"]
        analysis_keywords = ["分析", "评价", "看法", "观点", "讨论"]
        
        if any(keyword in question for keyword in math_keywords):
            return "math"
        elif any(keyword in question for keyword in logic_keywords):
            return "logic" 
        elif any(keyword in question for keyword in analysis_keywords):
            return "analysis"
        else:
            return "general"
    
    def single_shot_cot(self, question: str, category: str = None) -> Dict[str, Any]:
        """单轮CoT推理"""
        if category is None:
            category = self.analyze_question_type(question)
        
        prompt = self.prompt_engineer.create_cot_prompt(question, category)
        
        try:
            start_time = time.time()
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": "你是一个优秀的推理助手,擅长逐步分析和解决问题。"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=1500
            )
            
            reasoning_text = response.choices[0].message.content
            processing_time = time.time() - start_time
            
            # 解析响应
            parsed_result = self.prompt_engineer.parse_cot_response(reasoning_text)
            parsed_result.update({
                "question": question,
                "category": category,
                "processing_time": processing_time,
                "full_reasoning": reasoning_text
            })
            
            return parsed_result
            
        except Exception as e:
            return {
                "question": question,
                "error": str(e),
                "steps": [],
                "final_answer": f"处理问题时出错: {str(e)}"
            }
    
    def multi_step_cot(self, question: str, max_iterations: int = 3) -> Dict[str, Any]:
        """多轮CoT推理"""
        current_question = question
        all_steps = []
        iteration = 0
        
        while iteration < max_iterations:
            print(f"🔄 CoT迭代 {iteration + 1}")
            
            result = self.single_shot_cot(current_question)
            
            if "error" in result:
                return result
            
            all_steps.extend(result["steps"])
            
            # 检查是否有最终答案
            if result["final_answer"] and result["final_answer"] != "未找到明确答案":
                break
            
            # 如果没有最终答案,基于当前推理提出新问题
            if result["steps"]:
                last_step = result["steps"][-1]["content"]
                current_question = f"基于之前的推理: '{last_step}',请继续深入分析原问题: {question}"
            else:
                current_question = f"请从不同角度重新分析: {question}"
            
            iteration += 1
        
        return {
            "question": question,
            "steps": all_steps,
            "final_answer": result["final_answer"],
            "iterations": iteration + 1,
            "total_steps": len(all_steps),
            "processing_time": result.get("processing_time", 0)
        }

def demonstrate_cot_system():
    """演示CoT系统"""
    system = CoTReasoningSystem()
    
    # 测试问题
    test_questions = [
        "如果3个人3天能完成3个项目,那么9个人9天能完成多少个项目?",
        "分析人工智能对未来就业市场的影响",
        "推理: 所有猫都是动物,有些动物会飞,那么猫会飞吗?为什么?",
        "计算: 一个游泳池长20米,宽10米,深2米,如果每小时注水100立方米,需要多少小时注满?"
    ]
    
    for question in test_questions:
        print(f"\n{'='*70}")
        print(f"❓ 问题: {question}")
        print(f"{'='*70}")
        
        # 单轮CoT
        result = system.single_shot_cot(question)
        
        if "error" in result:
            print(f"❌ 错误: {result['error']}")
            continue
        
        # 显示推理步骤
        print(f"📝 推理过程 ({result['step_count']} 个步骤):")
        for step in result["steps"]:
            print(f"   🔸 步骤 {step['step']}: {step['content']}")
        
        print(f"\n✅ 最终答案: {result['final_answer']}")
        print(f"⏱️ 处理时间: {result['processing_time']:.2f}秒")
        print(f"📊 推理长度: {result['reasoning_length']} 字符")

if __name__ == "__main__":
    demonstrate_cot_system()

步骤2:高级CoT技术

创建文件:advanced_cot_techniques.py

python 复制代码
import json
from typing import Dict, List, Any
from openai import OpenAI
import os

class AdvancedCoTTechniques:
    def __init__(self, openai_api_key: str = None):
        self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
    
    def self_consistency_cot(self, question: str, num_paths: int = 3) -> Dict[str, Any]:
        """自洽性CoT:生成多个推理路径并选择最一致的答案"""
        reasoning_paths = []
        
        for i in range(num_paths):
            print(f"🔄 生成推理路径 {i+1}/{num_paths}")
            
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "你是一个推理专家。请提供详细的逐步推理过程。"},
                    {"role": "user", "content": f"请逐步推理以下问题:{question}"}
                ],
                temperature=0.7,  # 更高的温度以获得多样性
                max_tokens=800
            )
            
            reasoning = response.choices[0].message.content
            reasoning_paths.append(reasoning)
        
        # 提取各路径的答案
        answers = self._extract_answers_from_reasoning(reasoning_paths)
        
        # 选择最一致的答案
        final_answer = self._select_most_consistent_answer(answers)
        
        return {
            "question": question,
            "reasoning_paths": reasoning_paths,
            "answers": answers,
            "final_answer": final_answer,
            "num_paths": num_paths
        }
    
    def step_back_cot(self, question: str) -> Dict[str, Any]:
        """Step-back CoT:先抽象再具体"""
        # 第一步:抽象思考
        abstraction_prompt = f"""请先对这个问题的核心概念和基本原理进行抽象思考:

问题: {question}

抽象思考:"""
        
        abstraction_response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "你擅长抽象思维和概念分析。"},
                {"role": "user", "content": abstraction_prompt}
            ],
            temperature=0.3,
            max_tokens=400
        )
        
        abstraction = abstraction_response.choices[0].message.content
        
        # 第二步:具体推理
        concrete_prompt = f"""基于以下抽象思考,请具体推理问题:

抽象思考: {abstraction}

原问题: {question}

具体推理:"""
        
        concrete_response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "你擅长具体推理和问题解决。"},
                {"role": "user", "content": concrete_prompt}
            ],
            temperature=0.3,
            max_tokens=800
        )
        
        concrete_reasoning = concrete_response.choices[0].message.content
        
        return {
            "question": question,
            "abstraction": abstraction,
            "concrete_reasoning": concrete_reasoning,
            "full_response": f"抽象思考:\n{abstraction}\n\n具体推理:\n{concrete_reasoning}"
        }
    
    def _extract_answers_from_reasoning(self, reasoning_paths: List[str]) -> List[str]:
        """从推理文本中提取答案"""
        answers = []
        
        for reasoning in reasoning_paths:
            # 简单的答案提取逻辑
            lines = reasoning.split('\n')
            for line in reversed(lines):
                line = line.strip()
                if line and any(keyword in line.lower() for keyword in 
                              ['答案', '结果', '结论', '因此', '所以', 'answer', 'result']):
                    answers.append(line)
                    break
            else:
                answers.append("未明确提取答案")
        
        return answers
    
    def _select_most_consistent_answer(self, answers: List[str]) -> str:
        """选择最一致的答案"""
        if not answers:
            return "无法确定答案"
        
        # 简单的多数投票
        answer_counts = {}
        for answer in answers:
            if answer in answer_counts:
                answer_counts[answer] += 1
            else:
                answer_counts[answer] = 1
        
        # 返回出现次数最多的答案
        return max(answer_counts.items(), key=lambda x: x[1])[0]

def demonstrate_advanced_cot():
    """演示高级CoT技术"""
    advanced_cot = AdvancedCoTTechniques()
    
    test_question = "一个篮子里有5个苹果,拿走2个,又放进3个,然后拿走1个,最后剩下几个苹果?"
    
    print("🧠 自洽性CoT演示:")
    print("=" * 50)
    
    self_consistency_result = advanced_cot.self_consistency_cot(test_question, 3)
    
    print(f"问题: {self_consistency_result['question']}")
    print(f"\n生成的推理路径:")
    for i, path in enumerate(self_consistency_result['reasoning_paths']):
        print(f"\n路径 {i+1}:")
        print(path[:200] + "..." if len(path) > 200 else path)
    
    print(f"\n提取的答案: {self_consistency_result['answers']}")
    print(f"最终选择: {self_consistency_result['final_answer']}")
    
    print("\n" + "=" * 50)
    print("🔄 Step-back CoT演示:")
    
    step_back_result = advanced_cot.step_back_cot(test_question)
    
    print(f"抽象思考:\n{step_back_result['abstraction']}")
    print(f"\n具体推理:\n{step_back_result['concrete_reasoning']}")

if __name__ == "__main__":
    demonstrate_advanced_cot()

4.3 CoT流程图

创建文件:cot_flowchart.mermaid

flowchart TD A[输入复杂问题] --> B[分析问题类型] B --> C[选择CoT模板] C --> D[生成CoT提示] D --> E[LLM逐步推理] E --> F{解析推理结果} F --> G[提取推理步骤] F --> H[提取最终答案] G --> I{答案是否明确?} I -->|是| J[返回完整结果] I -->|否| K[多轮推理迭代] K --> D H --> J subgraph 高级CoT技术 L[自洽性CoT] --> M[生成多个推理路径] M --> N[投票选择最佳答案] O[Step-back CoT] --> P[抽象层次思考] P --> Q[具体层次推理] Q --> R[综合答案生成] end style A fill:#4CAF50,stroke:#388E3C,color:white style J fill:#2196F3,stroke:#1976D2,color:white style E fill:#FF9800,stroke:#F57C00,color:white style L fill:#9C27B0,stroke:#7B1FA2,color:white style O fill:#607D8B,stroke:#455A64,color:white

5. 三大技术集成应用

5.1 构建垂直领域智能Agent

创建文件:vertical_agent.py

python 复制代码
import os
from openai import OpenAI
from typing import Dict, List, Any
import json

class VerticalDomainAgent:
    def __init__(self, domain: str, openai_api_key: str = None):
        self.domain = domain
        self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
        self.conversation_history = []
        
        # 集成三大技术
        from complete_rag_system import CompleteRAGSystem
        from openai_react_system import OpenAIReActSystem
        from cot_reasoning_system import CoTReasoningSystem
        
        self.rag_system = CompleteRAGSystem()
        self.react_system = OpenAIReActSystem()
        self.cot_system = CoTReasoningSystem()
    
    def analyze_query_complexity(self, query: str) -> Dict[str, Any]:
        """分析查询复杂度并选择合适的技术"""
        complexity_score = 0
        
        # 基于查询特征评分
        features = {
            "length": len(query.split()),
            "has_technical_terms": any(term in query.lower() for term in 
                                     ['如何', '步骤', '方法', '原理', '机制']),
            "requires_calculation": any(op in query for op in 
                                      ['+', '-', '*', '/', '等于', '计算']),
            "requires_reasoning": any(word in query for word in 
                                    ['为什么', '原因', '推理', '分析', '比较']),
            "requires_external_knowledge": any(word in query for word in 
                                             ['最新', '新闻', '更新', '当前'])
        }
        
        # 计算复杂度分数
        complexity_score += min(features["length"] / 5, 3)  # 长度贡献
        complexity_score += 2 if features["has_technical_terms"] else 0
        complexity_score += 3 if features["requires_calculation"] else 0
        complexity_score += 3 if features["requires_reasoning"] else 0
        complexity_score += 2 if features["requires_external_knowledge"] else 0
        
        # 选择技术策略
        if complexity_score <= 3:
            strategy = "direct"  # 直接回答
        elif complexity_score <= 6:
            strategy = "rag"     # 检索增强
        elif complexity_score <= 9:
            strategy = "cot"     # 思维链
        else:
            strategy = "react"   # 推理行动
        
        return {
            "complexity_score": complexity_score,
            "strategy": strategy,
            "features": features
        }
    
    def process_query(self, query: str) -> Dict[str, Any]:
        """处理用户查询"""
        print(f"🎯 处理查询: {query}")
        
        # 分析查询复杂度
        analysis = self.analyze_query_complexity(query)
        strategy = analysis["strategy"]
        
        print(f"📊 复杂度分数: {analysis['complexity_score']}")
        print(f"🎯 选择策略: {strategy}")
        
        result = {
            "query": query,
            "strategy": strategy,
            "complexity_analysis": analysis
        }
        
        # 根据策略选择技术
        if strategy == "direct":
            response = self._direct_response(query)
            result.update(response)
        
        elif strategy == "rag":
            rag_result = self.rag_system.query(query)
            result.update({
                "answer": rag_result["answer"],
                "sources": rag_result["sources"],
                "retrieved_count": rag_result["retrieved_count"]
            })
        
        elif strategy == "cot":
            cot_result = self.cot_system.single_shot_cot(query)
            result.update({
                "answer": cot_result["final_answer"],
                "reasoning_steps": cot_result["steps"],
                "step_count": cot_result["step_count"]
            })
        
        elif strategy == "react":
            react_result = self.react_system.run(query)
            result.update({
                "answer": react_result["final_answer"],
                "execution_steps": react_result["steps"],
                "total_steps": react_result["total_steps"]
            })
        
        # 记录对话历史
        self.conversation_history.append({
            "query": query,
            "strategy": strategy,
            "response": result.get("answer", "")
        })
        
        return result
    
    def _direct_response(self, query: str) -> Dict[str, Any]:
        """直接响应简单查询"""
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": f"你是一个{self.domain}领域的专家助手。"},
                    {"role": "user", "content": query}
                ],
                temperature=0.3,
                max_tokens=500
            )
            
            return {
                "answer": response.choices[0].message.content,
                "method": "direct_generation"
            }
        except Exception as e:
            return {
                "answer": f"抱歉,处理问题时出现错误: {str(e)}",
                "method": "error"
            }
    
    def get_conversation_summary(self) -> Dict[str, Any]:
        """获取对话摘要"""
        if not self.conversation_history:
            return {"message": "暂无对话历史"}
        
        total_queries = len(self.conversation_history)
        strategy_counts = {}
        
        for conv in self.conversation_history:
            strategy = conv["strategy"]
            strategy_counts[strategy] = strategy_counts.get(strategy, 0) + 1
        
        return {
            "total_queries": total_queries,
            "strategy_distribution": strategy_counts,
            "recent_queries": self.conversation_history[-5:]  # 最近5条
        }

def demonstrate_vertical_agent():
    """演示垂直领域Agent"""
    # 初始化医疗领域Agent
    medical_agent = VerticalDomainAgent("医疗健康")
    
    test_queries = [
        "什么是糖尿病?",  # 简单问题 - 直接回答
        "最新的糖尿病治疗指南有哪些内容?",  # 需要最新知识 - RAG
        "为什么糖尿病患者需要控制饮食?请详细分析原因。",  # 需要推理 - CoT
        "计算一个BMI为28的糖尿病患者的每日热量需求,并制定饮食计划"  # 复杂任务 - ReAct
    ]
    
    for query in test_queries:
        print(f"\n{'='*80}")
        print(f"🗣️ 用户查询: {query}")
        print(f"{'='*80}")
        
        result = medical_agent.process_query(query)
        
        print(f"🎯 使用策略: {result['strategy']}")
        print(f"📊 复杂度: {result['complexity_analysis']['complexity_score']}")
        print(f"🤖 回答: {result['answer']}")
        
        # 显示技术特定信息
        if result['strategy'] == 'rag' and 'sources' in result:
            print(f"📚 参考来源: {len(result['sources'])} 个文档")
        
        elif result['strategy'] == 'cot' and 'reasoning_steps' in result:
            print(f"🔍 推理步骤: {result['step_count']} 步")
        
        elif result['strategy'] == 'react' and 'execution_steps' in result:
            print(f"⚡ 执行步骤: {result['total_steps']} 步")
    
    # 显示对话摘要
    print(f"\n{'='*80}")
    print("📈 对话摘要")
    print(f"{'='*80}")
    
    summary = medical_agent.get_conversation_summary()
    print(f"总查询数: {summary['total_queries']}")
    print(f"策略分布: {summary['strategy_distribution']}")

if __name__ == "__main__":
    demonstrate_vertical_agent()

6. 部署与优化

6.1 性能监控与优化

创建文件:performance_monitor.py

python 复制代码
import time
import psutil
import GPUtil
from datetime import datetime
from typing import Dict, List, Any
import json

class PerformanceMonitor:
    def __init__(self):
        self.metrics_history = []
        self.start_time = time.time()
    
    def collect_system_metrics(self) -> Dict[str, Any]:
        """收集系统性能指标"""
        # CPU使用率
        cpu_percent = psutil.cpu_percent(interval=1)
        
        # 内存使用
        memory = psutil.virtual_memory()
        
        # 磁盘使用
        disk = psutil.disk_usage('/')
        
        # GPU使用(如果可用)
        gpu_metrics = {}
        try:
            gpus = GPUtil.getGPUs()
            for i, gpu in enumerate(gpus):
                gpu_metrics[f"gpu_{i}"] = {
                    "load": gpu.load * 100,
                    "memory_used": gpu.memoryUsed,
                    "memory_total": gpu.memoryTotal
                }
        except:
            gpu_metrics = {"error": "GPU信息不可用"}
        
        return {
            "timestamp": datetime.now().isoformat(),
            "cpu_percent": cpu_percent,
            "memory_percent": memory.percent,
            "memory_used_gb": memory.used / (1024**3),
            "memory_total_gb": memory.total / (1024**3),
            "disk_percent": disk.percent,
            "gpu_metrics": gpu_metrics
        }
    
    def record_api_metrics(self, operation: str, duration: float, 
                          success: bool, tokens_used: int = 0) -> Dict[str, Any]:
        """记录API操作指标"""
        metric = {
            "timestamp": datetime.now().isoformat(),
            "operation": operation,
            "duration_seconds": duration,
            "success": success,
            "tokens_used": tokens_used
        }
        
        self.metrics_history.append(metric)
        return metric
    
    def get_performance_summary(self) -> Dict[str, Any]:
        """获取性能摘要"""
        if not self.metrics_history:
            return {"message": "暂无性能数据"}
        
        successful_ops = [m for m in self.metrics_history if m["success"]]
        failed_ops = [m for m in self.metrics_history if not m["success"]]
        
        if successful_ops:
            avg_duration = sum(m["duration_seconds"] for m in successful_ops) / len(successful_ops)
            total_tokens = sum(m.get("tokens_used", 0) for m in successful_ops)
        else:
            avg_duration = 0
            total_tokens = 0
        
        return {
            "total_operations": len(self.metrics_history),
            "successful_operations": len(successful_ops),
            "failed_operations": len(failed_ops),
            "success_rate": len(successful_ops) / len(self.metrics_history) * 100,
            "average_duration_seconds": avg_duration,
            "total_tokens_used": total_tokens,
            "uptime_hours": (time.time() - self.start_time) / 3600
        }
    
    def save_metrics_to_file(self, filename: str = "performance_metrics.json"):
        """保存指标到文件"""
        data = {
            "system_metrics": self.collect_system_metrics(),
            "performance_summary": self.get_performance_summary(),
            "recent_operations": self.metrics_history[-50:]  # 最近50个操作
        }
        
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        print(f"✅ 性能指标已保存到: {filename}")

def monitor_demo():
    """性能监控演示"""
    monitor = PerformanceMonitor()
    
    # 模拟一些操作
    operations = [
        ("rag_query", 1.2, True, 450),
        ("cot_reasoning", 3.5, True, 1200),
        ("react_execution", 8.7, True, 2300),
        ("direct_response", 0.8, True, 300),
        ("api_call", 2.1, False, 0)  # 失败的操作
    ]
    
    for op_name, duration, success, tokens in operations:
        monitor.record_api_metrics(op_name, duration, success, tokens)
        time.sleep(0.5)  # 模拟操作间隔
    
    # 显示性能摘要
    summary = monitor.get_performance_summary()
    print("📊 性能摘要:")
    for key, value in summary.items():
        print(f"  {key}: {value}")
    
    # 保存指标
    monitor.save_metrics_to_file()

if __name__ == "__main__":
    monitor_demo()

7. 结论

通过本文详细的教程,我们深入探讨了构建垂直Agent的三大核心技术:RAG、ReAct和CoT。每种技术都有其独特的优势和适用场景:

  • RAG 解决了大模型的知识更新和事实准确性问题
  • ReAct 提供了复杂问题的动态规划和执行能力
  • CoT 显著提升了模型的推理和逻辑分析能力

本教程提供的完整代码可以根据具体业务需求进行调整和优化。

相关推荐
jay神2 分钟前
基于YOLOv8的传送带异物检测系统
人工智能·python·深度学习·yolo·可视化·计算机毕业设计
强风7944 分钟前
OpenCV基础入门
人工智能·opencv·计算机视觉
小超同学你好4 分钟前
Langgragh 19. Skills 4. SkillToolset 式设计 —— 工具化按需加载的 Skills(含代码示例)
人工智能·语言模型·langchain
人工智能培训5 分钟前
如何衔接知识图谱与图神经网络
人工智能·神经网络·知识图谱
火星资讯8 分钟前
Zenlayer Fabric Port 新加坡首发:城域免费,全球畅连
人工智能·科技
新缸中之脑8 分钟前
20个Nano Banana 2创意工作流
人工智能
智驱力人工智能10 分钟前
馆藏文物预防性保护依赖的图像分析技术 文物损害检测 文物破损检测 文物损害识别误报率优化方案 文物安全巡查AI系统案例 智慧文保AI监测
人工智能·算法·安全·yolo·边缘计算
tobias.b12 分钟前
机器学习 超清晰通俗讲解 + 核心算法全解(深度+易懂版)
人工智能·算法·机器学习
code_pgf13 分钟前
Jetson 上 OpenClaw + Ollama + llama.cpp 的联动配置模板部署大模型
服务器·数据库·人工智能·llama