1. 引言
在通用大模型快速发展的今天,垂直领域的专业化应用正成为AI落地的关键方向。垂直Agent通过结合领域专业知识,为大模型提供"专业对口"的能力,在医疗、金融、法律等专业领域展现出巨大价值。本文将详细解析实现垂直Agent的三大核心技术:RAG、ReAct和CoT,并提供完整的实操指南。
2. RAG技术详解与实现
2.1 RAG技术原理
检索增强生成通过将外部知识库与LLM结合,解决大模型的幻觉问题和知识滞后性。其核心流程包括文档处理、向量检索和生成增强三个环节。
2.1.1 RAG系统架构
创建文件:rag_architecture.py
python
class RAGSystem:
def __init__(self):
self.document_processor = DocumentProcessor()
self.vector_store = VectorStore()
self.retriever = Retriever()
self.generator = Generator()
def process_documents(self, documents):
"""文档处理流程"""
chunks = self.document_processor.split_documents(documents)
embeddings = self.document_processor.generate_embeddings(chunks)
self.vector_store.store_vectors(chunks, embeddings)
def query(self, question, top_k=5):
"""查询处理流程"""
query_embedding = self.retriever.encode_query(question)
relevant_chunks = self.retriever.retrieve_similar(
query_embedding, top_k
)
context = self._build_context(relevant_chunks)
answer = self.generator.generate(question, context)
return answer
def _build_context(self, chunks):
"""构建上下文"""
return "\n\n".join([chunk.text for chunk in chunks])
2.2 RAG完整实现步骤
步骤1:环境准备与依赖安装
创建文件:requirements_rag.txt
txt
langchain==0.0.346
langchain-community==0.0.7
openai==1.3.0
chromadb==0.4.15
sentence-transformers==2.2.2
pypdf==3.17.0
tiktoken==0.5.1
faiss-cpu==1.7.4
numpy==1.24.3
python-dotenv==1.0.0
创建文件:setup_environment.py
python
import os
import subprocess
import sys
from dotenv import load_dotenv
def setup_environment():
"""设置环境变量和依赖"""
# 安装依赖
try:
subprocess.check_call([
sys.executable, "-m", "pip", "install", "-r", "requirements_rag.txt"
])
print("✅ 依赖安装成功")
except subprocess.CalledProcessError:
print("❌ 依赖安装失败")
return False
# 加载环境变量
load_dotenv()
# 检查必要的环境变量
required_vars = ['OPENAI_API_KEY']
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"❌ 缺少环境变量: {missing_vars}")
print("请在 .env 文件中设置这些变量")
return False
print("✅ 环境设置完成")
return True
if __name__ == "__main__":
setup_environment()
步骤2:文档处理模块
创建文件:document_processor.py
python
import os
from typing import List, Dict, Any
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
PyPDFLoader, TextLoader, Docx2txtLoader
)
from sentence_transformers import SentenceTransformer
import numpy as np
class DocumentProcessor:
def __init__(self, chunk_size=1000, chunk_overlap=200):
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
separators=["\n\n", "\n", "。", "!", "?", ";", ",", "、", " "]
)
self.embedding_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
def load_documents(self, file_paths: List[str]) -> List[Dict[str, Any]]:
"""加载多种格式的文档"""
documents = []
for file_path in file_paths:
if not os.path.exists(file_path):
print(f"⚠️ 文件不存在: {file_path}")
continue
file_ext = os.path.splitext(file_path)[1].lower()
try:
if file_ext == '.pdf':
loader = PyPDFLoader(file_path)
elif file_ext == '.txt':
loader = TextLoader(file_path, encoding='utf-8')
elif file_ext in ['.docx', '.doc']:
loader = Docx2txtLoader(file_path)
else:
print(f"⚠️ 不支持的文件格式: {file_ext}")
continue
loaded_docs = loader.load()
for doc in loaded_docs:
doc.metadata['source'] = file_path
documents.extend(loaded_docs)
print(f"✅ 成功加载: {file_path}")
except Exception as e:
print(f"❌ 加载文件失败 {file_path}: {str(e)}")
return documents
def split_documents(self, documents: List[Dict]) -> List[Dict]:
"""分割文档为chunk"""
chunks = []
for doc in documents:
text_chunks = self.text_splitter.split_text(doc.page_content)
for i, chunk in enumerate(text_chunks):
chunk_data = {
'text': chunk,
'source': doc.metadata.get('source', 'unknown'),
'chunk_id': f"{doc.metadata.get('source', 'unknown')}_{i}",
'page': doc.metadata.get('page', 0)
}
chunks.append(chunk_data)
print(f"✅ 文档分割完成,共生成 {len(chunks)} 个chunk")
return chunks
def generate_embeddings(self, chunks: List[Dict]) -> List[np.ndarray]:
"""为文本chunk生成嵌入向量"""
texts = [chunk['text'] for chunk in chunks]
embeddings = self.embedding_model.encode(texts)
print(f"✅ 嵌入向量生成完成,维度: {embeddings.shape}")
return embeddings
步骤3:向量存储与检索
创建文件:vector_store.py
python
import faiss
import numpy as np
import json
import pickle
from typing import List, Dict, Any
class VectorStore:
def __init__(self, dimension=384): # MiniLM模型的维度
self.dimension = dimension
self.index = faiss.IndexFlatIP(dimension) # 内积相似度
self.chunks = []
self.metadata = []
def store_vectors(self, chunks: List[Dict], embeddings: np.ndarray):
"""存储向量和元数据"""
if len(embeddings) == 0:
raise ValueError("嵌入向量不能为空")
# 归一化向量(用于余弦相似度)
faiss.normalize_L2(embeddings)
# 添加到索引
self.index.add(embeddings.astype('float32'))
# 存储元数据
self.chunks.extend([chunk['text'] for chunk in chunks])
self.metadata.extend(chunks)
print(f"✅ 向量存储完成,当前索引大小: {self.index.ntotal}")
def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Dict]:
"""相似度搜索"""
if self.index.ntotal == 0:
return []
# 归一化查询向量
query_embedding = query_embedding.reshape(1, -1).astype('float32')
faiss.normalize_L2(query_embedding)
# 搜索
similarities, indices = self.index.search(query_embedding, top_k)
results = []
for i, idx in enumerate(indices[0]):
if idx < len(self.metadata):
result = {
'text': self.chunks[idx],
'metadata': self.metadata[idx],
'similarity': float(similarities[0][i])
}
results.append(result)
return results
def save(self, filepath: str):
"""保存向量库"""
data = {
'chunks': self.chunks,
'metadata': self.metadata
}
# 保存数据
with open(f"{filepath}_data.pkl", 'wb') as f:
pickle.dump(data, f)
# 保存FAISS索引
faiss.write_index(self.index, f"{filepath}_index.faiss")
print(f"✅ 向量库保存到: {filepath}")
def load(self, filepath: str):
"""加载向量库"""
# 加载数据
with open(f"{filepath}_data.pkl", 'rb') as f:
data = pickle.load(f)
self.chunks = data['chunks']
self.metadata = data['metadata']
# 加载FAISS索引
self.index = faiss.read_index(f"{filepath}_index.faiss")
print(f"✅ 向量库从 {filepath} 加载完成")
步骤4:完整的RAG系统集成
创建文件:complete_rag_system.py
python
import os
from openai import OpenAI
from document_processor import DocumentProcessor
from vector_store import VectorStore
from typing import List, Dict, Any
class CompleteRAGSystem:
def __init__(self, openai_api_key: str = None):
self.document_processor = DocumentProcessor()
self.vector_store = VectorStore()
self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
def build_knowledge_base(self, document_paths: List[str]):
"""构建知识库"""
print("📚 开始构建知识库...")
# 加载文档
documents = self.document_processor.load_documents(document_paths)
if not documents:
raise ValueError("没有成功加载任何文档")
# 分割文档
chunks = self.document_processor.split_documents(documents)
# 生成嵌入向量
embeddings = self.document_processor.generate_embeddings(chunks)
# 存储向量
self.vector_store.store_vectors(chunks, embeddings)
print("✅ 知识库构建完成")
def retrieve_relevant_info(self, query: str, top_k: int = 5) -> List[Dict]:
"""检索相关信息"""
query_embedding = self.document_processor.embedding_model.encode([query])
results = self.vector_store.search(query_embedding, top_k)
return results
def generate_answer(self, query: str, context: List[Dict]) -> str:
"""基于上下文生成答案"""
context_text = "\n\n".join([
f"来源: {item['metadata']['source']}\n内容: {item['text']}"
for item in context
])
prompt = f"""基于以下上下文信息,请回答问题。如果上下文没有提供足够信息,请明确说明。
上下文:
{context_text}
问题: {query}
请提供准确、专业的回答,并注明信息来源。"""
try:
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "你是一个专业的助手,基于提供的上下文信息回答问题。"},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"生成回答时出错: {str(e)}"
def query(self, question: str, top_k: int = 5) -> Dict[str, Any]:
"""完整查询流程"""
print(f"🔍 处理查询: {question}")
# 检索
relevant_info = self.retrieve_relevant_info(question, top_k)
print(f"📄 检索到 {len(relevant_info)} 个相关文档片段")
# 生成
answer = self.generate_answer(question, relevant_info)
return {
'question': question,
'answer': answer,
'sources': relevant_info,
'retrieved_count': len(relevant_info)
}
# 使用示例
def main():
# 初始化系统
rag_system = CompleteRAGSystem()
# 文档路径(请替换为实际路径)
document_paths = [
"documents/technical_manual.pdf",
"documents/faq.txt",
"documents/procedures.docx"
]
# 构建知识库
rag_system.build_knowledge_base(document_paths)
# 保存向量库
rag_system.vector_store.save("my_knowledge_base")
# 查询示例
questions = [
"产品的技术规格是什么?",
"如何解决常见问题?",
"操作步骤有哪些?"
]
for question in questions:
result = rag_system.query(question)
print(f"\n❓ 问题: {result['question']}")
print(f"🤖 回答: {result['answer']}")
print(f"📚 来源数: {result['retrieved_count']}")
print("-" * 50)
if __name__ == "__main__":
main()
2.3 RAG系统流程图
创建文件:rag_flowchart.mermaid
3. ReAct技术详解与实现
3.1 ReAct技术原理
ReAct框架通过结合推理和行动,让LLM能够动态规划和执行动作来解决复杂问题。
3.1.1 ReAct核心组件
创建文件:react_framework.py
python
from typing import List, Dict, Any, Tuple
from enum import Enum
import re
import json
class ActionType(Enum):
"""动作类型枚举"""
SEARCH = "search"
CALCULATE = "calculate"
LOOKUP = "lookup"
FINISH = "finish"
class ReActAgent:
def __init__(self, tools: Dict, max_steps: int = 10):
self.tools = tools
self.max_steps = max_steps
self.conversation_history = []
def parse_thought_action(self, response: str) -> Tuple[str, str, str]:
"""解析思维和动作"""
thought_pattern = r"Thought:\s*(.*?)(?=\nAction:|\n$)"
action_pattern = r"Action:\s*(\w+)\s*:\s*(.*?)(?=\n|\nThought:|\nAction:|\nObservation:|\n$)"
thought_match = re.search(thought_pattern, response, re.DOTALL)
action_match = re.search(action_pattern, response, re.DOTALL)
thought = thought_match.group(1).strip() if thought_match else ""
action_type = action_match.group(1) if action_match else ""
action_input = action_match.group(2) if action_match else ""
return thought, action_type, action_input
def execute_action(self, action_type: str, action_input: str) -> str:
"""执行动作"""
if action_type not in self.tools:
return f"错误: 未知动作类型 '{action_type}'"
try:
# 尝试解析JSON输入
if action_input.strip().startswith('{'):
params = json.loads(action_input)
else:
params = {"query": action_input.strip()}
except:
params = {"query": action_input.strip()}
try:
result = self.tools[action_type](**params)
return str(result)
except Exception as e:
return f"执行动作时出错: {str(e)}"
def run(self, query: str) -> Dict[str, Any]:
"""运行ReAct循环"""
steps = []
current_step = 0
# 初始提示
prompt = self._build_initial_prompt(query)
full_response = ""
while current_step < self.max_steps:
print(f"🔄 步骤 {current_step + 1}")
# 获取LLM响应
llm_response = self._call_llm(prompt)
full_response += llm_response + "\n"
# 解析思维和动作
thought, action_type, action_input = self.parse_thought_action(llm_response)
step_data = {
"step": current_step + 1,
"thought": thought,
"action_type": action_type,
"action_input": action_input,
"observation": ""
}
# 检查是否完成
if action_type.lower() == "finish":
step_data["observation"] = "任务完成"
steps.append(step_data)
break
# 执行动作
if action_type and action_type in self.tools:
observation = self.execute_action(action_type, action_input)
step_data["observation"] = observation
# 构建下一步提示
prompt += f"\n{llm_response}\nObservation: {observation}\n"
else:
step_data["observation"] = f"无效动作: {action_type}"
prompt += f"\n{llm_response}\nObservation: {step_data['observation']}\n"
steps.append(step_data)
current_step += 1
return {
"query": query,
"final_answer": self._extract_final_answer(full_response),
"steps": steps,
"total_steps": len(steps)
}
def _build_initial_prompt(self, query: str) -> str:
"""构建初始提示"""
tools_description = "\n".join([
f"- {tool_name}: {tool.__doc__}" for tool_name, tool in self.tools.items()
])
prompt = f"""你是一个智能助手,使用ReAct框架解决问题。按照以下格式响应:
Thought: 你的思考过程
Action: 动作类型: 动作输入
Observation: 动作执行结果
...(这个循环重复)
当你有最终答案时:
Thought: 我认为我有答案了
Action: finish: 最终答案
可用工具:
{tools_description}
开始!
问题: {query}
"""
return prompt
def _call_llm(self, prompt: str) -> str:
"""调用LLM(简化版,实际使用时需要接入真实LLM)"""
# 这里应该接入真实的LLM API
# 返回模拟响应用于演示
return "Thought: 我需要分析这个问题并决定使用哪个工具。"
def _extract_final_answer(self, response: str) -> str:
"""从响应中提取最终答案"""
finish_pattern = r"Action:\s*finish\s*:\s*(.*?)(?=\n|$)"
match = re.search(finish_pattern, response, re.DOTALL)
return match.group(1).strip() if match else "未找到明确答案"
3.2 ReAct完整实现
步骤1:工具函数实现
创建文件:react_tools.py
python
import math
import requests
from typing import Dict, Any
import json
class ReActTools:
"""ReAct工具集合"""
@staticmethod
def search(query: str) -> str:
"""
搜索工具:模拟网络搜索
"""
# 模拟搜索返回结果
mock_data = {
"python编程": "Python是一种高级编程语言,以简洁易读著称。",
"机器学习": "机器学习是人工智能的一个分支,专注于算法开发。",
"深度学习": "深度学习使用神经网络进行模式识别和预测。",
"自然语言处理": "NLP使计算机能够理解、解释和生成人类语言。"
}
return mock_data.get(query.lower(), f"未找到关于 '{query}' 的信息")
@staticmethod
def calculate(expression: str) -> str:
"""
计算工具:执行数学计算
"""
try:
# 安全地评估数学表达式
allowed_chars = set('0123456789+-*/.() ')
if all(c in allowed_chars for c in expression):
result = eval(expression)
return f"计算结果: {expression} = {result}"
else:
return "错误: 表达式包含不安全字符"
except Exception as e:
return f"计算错误: {str(e)}"
@staticmethod
def lookup(keyword: str) -> str:
"""
查找工具:在知识库中查找信息
"""
knowledge_base = {
"openai": "OpenAI是一家AI研究公司,开发了GPT系列模型。",
"transformer": "Transformer是一种基于自注意力机制的神经网络架构。",
"rag": "RAG(检索增强生成)结合检索系统和生成模型。",
"react": "ReAct框架结合推理和行动来解决复杂问题。"
}
return knowledge_base.get(keyword.lower(), f"未找到关于 '{keyword}' 的信息")
@staticmethod
def get_weather(city: str) -> str:
"""
天气查询工具:获取城市天气信息
"""
# 模拟天气数据
weather_data = {
"北京": "北京: 晴, 温度 25°C, 湿度 45%",
"上海": "上海: 多云, 温度 28°C, 湿度 60%",
"广州": "广州: 阵雨, 温度 30°C, 湿度 75%",
"深圳": "深圳: 阴, 温度 29°C, 湿度 70%"
}
return weather_data.get(city, f"未找到城市 '{city}' 的天气信息")
def create_tool_set() -> Dict[str, callable]:
"""创建工具集合"""
tools = ReActTools()
return {
"search": tools.search,
"calculate": tools.calculate,
"lookup": tools.lookup,
"get_weather": tools.get_weather
}
步骤2:集成OpenAI的ReAct系统
创建文件:openai_react_system.py
python
import os
from openai import OpenAI
from react_framework import ReActAgent
from react_tools import create_tool_set
import json
from typing import Dict, Any
class OpenAIReActSystem(ReActAgent):
def __init__(self, openai_api_key: str = None, model: str = "gpt-3.5-turbo"):
tools = create_tool_set()
super().__init__(tools, max_steps=8)
self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
self.model = model
def _call_llm(self, prompt: str) -> str:
"""调用OpenAI API"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "你是一个使用ReAct框架的智能助手。严格按照指定格式响应。"},
{"role": "user", "content": prompt}
],
temperature=0.1,
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
return f"Thought: 调用API时出错\nAction: finish: API错误 - {str(e)}"
def demonstrate_react_system():
"""演示ReAct系统"""
system = OpenAIReActSystem()
# 测试问题
test_questions = [
"计算一下 (15 + 27) * 3 等于多少?然后搜索一下机器学习的基本概念。",
"查找RAG的信息,然后获取北京的天气情况。",
"什么是深度学习?它和机器学习有什么关系?"
]
for question in test_questions:
print(f"\n{'='*60}")
print(f"❓ 问题: {question}")
print(f"{'='*60}")
result = system.run(question)
# 显示执行步骤
for step in result['steps']:
print(f"\n🔹 步骤 {step['step']}:")
print(f" 💭 思考: {step['thought']}")
print(f" ⚡ 动作: {step['action_type']}: {step['action_input']}")
print(f" 👀 观察: {step['observation']}")
print(f"\n✅ 最终答案: {result['final_answer']}")
print(f"📊 总步骤数: {result['total_steps']}")
if __name__ == "__main__":
demonstrate_react_system()
3.3 ReAct流程图
创建文件:react_flowchart.mermaid
4. CoT技术详解与实现
4.1 CoT技术原理
思维链通过引导模型展示推理过程,显著提升复杂问题的解决能力。
4.1.1 CoT提示工程
创建文件:cot_prompt_engineering.py
python
from typing import List, Dict, Any
import re
class CoTPromptEngineer:
def __init__(self):
self.cot_templates = {
"math": self._math_cot_template,
"logic": self._logic_cot_template,
"analysis": self._analysis_cot_template,
"general": self._general_cot_template
}
def create_cot_prompt(self, question: str, category: str = "general") -> str:
"""创建CoT提示"""
template = self.cot_templates.get(category, self._general_cot_template)
return template(question)
def _math_cot_template(self, question: str) -> str:
"""数学问题CoT模板"""
return f"""请逐步解决以下数学问题。展示你的完整推理过程。
问题: {question}
请按照以下格式回答:
步骤1: [第一步推理]
步骤2: [第二步推理]
...
最终答案: [答案]
现在开始:"""
def _logic_cot_template(self, question: str) -> str:
"""逻辑问题CoT模板"""
return f"""请逻辑推理以下问题。展示你的思考链条。
问题: {question}
请按照以下格式回答:
分析: [问题分析]
假设: [作出的假设]
推理: [逻辑推理过程]
验证: [验证推理]
结论: [最终结论]
现在开始:"""
def _analysis_cot_template(self, question: str) -> str:
"""分析问题CoT模板"""
return f"""请深入分析以下问题。展示你的多层次思考。
问题: {question}
请按照以下格式回答:
第一层思考: [表面分析]
第二层思考: [深入分析]
第三层思考: [综合考量]
关键洞察: [核心发现]
最终观点: [总结观点]
现在开始:"""
def _general_cot_template(self, question: str) -> str:
"""通用CoT模板"""
return f"""请仔细思考并逐步回答以下问题。展示你的完整推理过程。
问题: {question}
让我们一步一步思考:
第一步:"""
def parse_cot_response(self, response: str) -> Dict[str, Any]:
"""解析CoT响应"""
steps = []
final_answer = ""
# 提取步骤
step_pattern = r"(?:步骤|Step)\s*(\d+)[::]\s*(.*?)(?=(?:步骤|Step|\n最终答案|最终结论|$))"
step_matches = re.findall(step_pattern, response, re.IGNORECASE | re.DOTALL)
for step_num, step_content in step_matches:
steps.append({
"step": int(step_num),
"content": step_content.strip()
})
# 提取最终答案
answer_patterns = [
r"最终答案[::]\s*(.*?)$",
r"最终结论[::]\s*(.*?)$",
r"结论[::]\s*(.*?)$",
r"最终观点[::]\s*(.*?)$"
]
for pattern in answer_patterns:
match = re.search(pattern, response, re.IGNORECASE | re.DOTALL)
if match:
final_answer = match.group(1).strip()
break
return {
"steps": steps,
"final_answer": final_answer,
"reasoning_length": len(response),
"step_count": len(steps)
}
4.2 CoT完整实现
步骤1:CoT推理系统
创建文件:cot_reasoning_system.py
python
import os
from openai import OpenAI
from cot_prompt_engineering import CoTPromptEngineer
from typing import Dict, Any, List
import time
class CoTReasoningSystem:
def __init__(self, openai_api_key: str = None, model: str = "gpt-3.5-turbo"):
self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
self.model = model
self.prompt_engineer = CoTPromptEngineer()
self.conversation_history = []
def analyze_question_type(self, question: str) -> str:
"""分析问题类型"""
math_keywords = ["计算", "等于", "数学", "方程", "公式", "多少"]
logic_keywords = ["推理", "逻辑", "如果", "那么", "因为", "所以"]
analysis_keywords = ["分析", "评价", "看法", "观点", "讨论"]
if any(keyword in question for keyword in math_keywords):
return "math"
elif any(keyword in question for keyword in logic_keywords):
return "logic"
elif any(keyword in question for keyword in analysis_keywords):
return "analysis"
else:
return "general"
def single_shot_cot(self, question: str, category: str = None) -> Dict[str, Any]:
"""单轮CoT推理"""
if category is None:
category = self.analyze_question_type(question)
prompt = self.prompt_engineer.create_cot_prompt(question, category)
try:
start_time = time.time()
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "你是一个优秀的推理助手,擅长逐步分析和解决问题。"},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=1500
)
reasoning_text = response.choices[0].message.content
processing_time = time.time() - start_time
# 解析响应
parsed_result = self.prompt_engineer.parse_cot_response(reasoning_text)
parsed_result.update({
"question": question,
"category": category,
"processing_time": processing_time,
"full_reasoning": reasoning_text
})
return parsed_result
except Exception as e:
return {
"question": question,
"error": str(e),
"steps": [],
"final_answer": f"处理问题时出错: {str(e)}"
}
def multi_step_cot(self, question: str, max_iterations: int = 3) -> Dict[str, Any]:
"""多轮CoT推理"""
current_question = question
all_steps = []
iteration = 0
while iteration < max_iterations:
print(f"🔄 CoT迭代 {iteration + 1}")
result = self.single_shot_cot(current_question)
if "error" in result:
return result
all_steps.extend(result["steps"])
# 检查是否有最终答案
if result["final_answer"] and result["final_answer"] != "未找到明确答案":
break
# 如果没有最终答案,基于当前推理提出新问题
if result["steps"]:
last_step = result["steps"][-1]["content"]
current_question = f"基于之前的推理: '{last_step}',请继续深入分析原问题: {question}"
else:
current_question = f"请从不同角度重新分析: {question}"
iteration += 1
return {
"question": question,
"steps": all_steps,
"final_answer": result["final_answer"],
"iterations": iteration + 1,
"total_steps": len(all_steps),
"processing_time": result.get("processing_time", 0)
}
def demonstrate_cot_system():
"""演示CoT系统"""
system = CoTReasoningSystem()
# 测试问题
test_questions = [
"如果3个人3天能完成3个项目,那么9个人9天能完成多少个项目?",
"分析人工智能对未来就业市场的影响",
"推理: 所有猫都是动物,有些动物会飞,那么猫会飞吗?为什么?",
"计算: 一个游泳池长20米,宽10米,深2米,如果每小时注水100立方米,需要多少小时注满?"
]
for question in test_questions:
print(f"\n{'='*70}")
print(f"❓ 问题: {question}")
print(f"{'='*70}")
# 单轮CoT
result = system.single_shot_cot(question)
if "error" in result:
print(f"❌ 错误: {result['error']}")
continue
# 显示推理步骤
print(f"📝 推理过程 ({result['step_count']} 个步骤):")
for step in result["steps"]:
print(f" 🔸 步骤 {step['step']}: {step['content']}")
print(f"\n✅ 最终答案: {result['final_answer']}")
print(f"⏱️ 处理时间: {result['processing_time']:.2f}秒")
print(f"📊 推理长度: {result['reasoning_length']} 字符")
if __name__ == "__main__":
demonstrate_cot_system()
步骤2:高级CoT技术
创建文件:advanced_cot_techniques.py
python
import json
from typing import Dict, List, Any
from openai import OpenAI
import os
class AdvancedCoTTechniques:
def __init__(self, openai_api_key: str = None):
self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
def self_consistency_cot(self, question: str, num_paths: int = 3) -> Dict[str, Any]:
"""自洽性CoT:生成多个推理路径并选择最一致的答案"""
reasoning_paths = []
for i in range(num_paths):
print(f"🔄 生成推理路径 {i+1}/{num_paths}")
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "你是一个推理专家。请提供详细的逐步推理过程。"},
{"role": "user", "content": f"请逐步推理以下问题:{question}"}
],
temperature=0.7, # 更高的温度以获得多样性
max_tokens=800
)
reasoning = response.choices[0].message.content
reasoning_paths.append(reasoning)
# 提取各路径的答案
answers = self._extract_answers_from_reasoning(reasoning_paths)
# 选择最一致的答案
final_answer = self._select_most_consistent_answer(answers)
return {
"question": question,
"reasoning_paths": reasoning_paths,
"answers": answers,
"final_answer": final_answer,
"num_paths": num_paths
}
def step_back_cot(self, question: str) -> Dict[str, Any]:
"""Step-back CoT:先抽象再具体"""
# 第一步:抽象思考
abstraction_prompt = f"""请先对这个问题的核心概念和基本原理进行抽象思考:
问题: {question}
抽象思考:"""
abstraction_response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "你擅长抽象思维和概念分析。"},
{"role": "user", "content": abstraction_prompt}
],
temperature=0.3,
max_tokens=400
)
abstraction = abstraction_response.choices[0].message.content
# 第二步:具体推理
concrete_prompt = f"""基于以下抽象思考,请具体推理问题:
抽象思考: {abstraction}
原问题: {question}
具体推理:"""
concrete_response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "你擅长具体推理和问题解决。"},
{"role": "user", "content": concrete_prompt}
],
temperature=0.3,
max_tokens=800
)
concrete_reasoning = concrete_response.choices[0].message.content
return {
"question": question,
"abstraction": abstraction,
"concrete_reasoning": concrete_reasoning,
"full_response": f"抽象思考:\n{abstraction}\n\n具体推理:\n{concrete_reasoning}"
}
def _extract_answers_from_reasoning(self, reasoning_paths: List[str]) -> List[str]:
"""从推理文本中提取答案"""
answers = []
for reasoning in reasoning_paths:
# 简单的答案提取逻辑
lines = reasoning.split('\n')
for line in reversed(lines):
line = line.strip()
if line and any(keyword in line.lower() for keyword in
['答案', '结果', '结论', '因此', '所以', 'answer', 'result']):
answers.append(line)
break
else:
answers.append("未明确提取答案")
return answers
def _select_most_consistent_answer(self, answers: List[str]) -> str:
"""选择最一致的答案"""
if not answers:
return "无法确定答案"
# 简单的多数投票
answer_counts = {}
for answer in answers:
if answer in answer_counts:
answer_counts[answer] += 1
else:
answer_counts[answer] = 1
# 返回出现次数最多的答案
return max(answer_counts.items(), key=lambda x: x[1])[0]
def demonstrate_advanced_cot():
"""演示高级CoT技术"""
advanced_cot = AdvancedCoTTechniques()
test_question = "一个篮子里有5个苹果,拿走2个,又放进3个,然后拿走1个,最后剩下几个苹果?"
print("🧠 自洽性CoT演示:")
print("=" * 50)
self_consistency_result = advanced_cot.self_consistency_cot(test_question, 3)
print(f"问题: {self_consistency_result['question']}")
print(f"\n生成的推理路径:")
for i, path in enumerate(self_consistency_result['reasoning_paths']):
print(f"\n路径 {i+1}:")
print(path[:200] + "..." if len(path) > 200 else path)
print(f"\n提取的答案: {self_consistency_result['answers']}")
print(f"最终选择: {self_consistency_result['final_answer']}")
print("\n" + "=" * 50)
print("🔄 Step-back CoT演示:")
step_back_result = advanced_cot.step_back_cot(test_question)
print(f"抽象思考:\n{step_back_result['abstraction']}")
print(f"\n具体推理:\n{step_back_result['concrete_reasoning']}")
if __name__ == "__main__":
demonstrate_advanced_cot()
4.3 CoT流程图
创建文件:cot_flowchart.mermaid
5. 三大技术集成应用
5.1 构建垂直领域智能Agent
创建文件:vertical_agent.py
python
import os
from openai import OpenAI
from typing import Dict, List, Any
import json
class VerticalDomainAgent:
def __init__(self, domain: str, openai_api_key: str = None):
self.domain = domain
self.client = OpenAI(api_key=openai_api_key or os.getenv('OPENAI_API_KEY'))
self.conversation_history = []
# 集成三大技术
from complete_rag_system import CompleteRAGSystem
from openai_react_system import OpenAIReActSystem
from cot_reasoning_system import CoTReasoningSystem
self.rag_system = CompleteRAGSystem()
self.react_system = OpenAIReActSystem()
self.cot_system = CoTReasoningSystem()
def analyze_query_complexity(self, query: str) -> Dict[str, Any]:
"""分析查询复杂度并选择合适的技术"""
complexity_score = 0
# 基于查询特征评分
features = {
"length": len(query.split()),
"has_technical_terms": any(term in query.lower() for term in
['如何', '步骤', '方法', '原理', '机制']),
"requires_calculation": any(op in query for op in
['+', '-', '*', '/', '等于', '计算']),
"requires_reasoning": any(word in query for word in
['为什么', '原因', '推理', '分析', '比较']),
"requires_external_knowledge": any(word in query for word in
['最新', '新闻', '更新', '当前'])
}
# 计算复杂度分数
complexity_score += min(features["length"] / 5, 3) # 长度贡献
complexity_score += 2 if features["has_technical_terms"] else 0
complexity_score += 3 if features["requires_calculation"] else 0
complexity_score += 3 if features["requires_reasoning"] else 0
complexity_score += 2 if features["requires_external_knowledge"] else 0
# 选择技术策略
if complexity_score <= 3:
strategy = "direct" # 直接回答
elif complexity_score <= 6:
strategy = "rag" # 检索增强
elif complexity_score <= 9:
strategy = "cot" # 思维链
else:
strategy = "react" # 推理行动
return {
"complexity_score": complexity_score,
"strategy": strategy,
"features": features
}
def process_query(self, query: str) -> Dict[str, Any]:
"""处理用户查询"""
print(f"🎯 处理查询: {query}")
# 分析查询复杂度
analysis = self.analyze_query_complexity(query)
strategy = analysis["strategy"]
print(f"📊 复杂度分数: {analysis['complexity_score']}")
print(f"🎯 选择策略: {strategy}")
result = {
"query": query,
"strategy": strategy,
"complexity_analysis": analysis
}
# 根据策略选择技术
if strategy == "direct":
response = self._direct_response(query)
result.update(response)
elif strategy == "rag":
rag_result = self.rag_system.query(query)
result.update({
"answer": rag_result["answer"],
"sources": rag_result["sources"],
"retrieved_count": rag_result["retrieved_count"]
})
elif strategy == "cot":
cot_result = self.cot_system.single_shot_cot(query)
result.update({
"answer": cot_result["final_answer"],
"reasoning_steps": cot_result["steps"],
"step_count": cot_result["step_count"]
})
elif strategy == "react":
react_result = self.react_system.run(query)
result.update({
"answer": react_result["final_answer"],
"execution_steps": react_result["steps"],
"total_steps": react_result["total_steps"]
})
# 记录对话历史
self.conversation_history.append({
"query": query,
"strategy": strategy,
"response": result.get("answer", "")
})
return result
def _direct_response(self, query: str) -> Dict[str, Any]:
"""直接响应简单查询"""
try:
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"你是一个{self.domain}领域的专家助手。"},
{"role": "user", "content": query}
],
temperature=0.3,
max_tokens=500
)
return {
"answer": response.choices[0].message.content,
"method": "direct_generation"
}
except Exception as e:
return {
"answer": f"抱歉,处理问题时出现错误: {str(e)}",
"method": "error"
}
def get_conversation_summary(self) -> Dict[str, Any]:
"""获取对话摘要"""
if not self.conversation_history:
return {"message": "暂无对话历史"}
total_queries = len(self.conversation_history)
strategy_counts = {}
for conv in self.conversation_history:
strategy = conv["strategy"]
strategy_counts[strategy] = strategy_counts.get(strategy, 0) + 1
return {
"total_queries": total_queries,
"strategy_distribution": strategy_counts,
"recent_queries": self.conversation_history[-5:] # 最近5条
}
def demonstrate_vertical_agent():
"""演示垂直领域Agent"""
# 初始化医疗领域Agent
medical_agent = VerticalDomainAgent("医疗健康")
test_queries = [
"什么是糖尿病?", # 简单问题 - 直接回答
"最新的糖尿病治疗指南有哪些内容?", # 需要最新知识 - RAG
"为什么糖尿病患者需要控制饮食?请详细分析原因。", # 需要推理 - CoT
"计算一个BMI为28的糖尿病患者的每日热量需求,并制定饮食计划" # 复杂任务 - ReAct
]
for query in test_queries:
print(f"\n{'='*80}")
print(f"🗣️ 用户查询: {query}")
print(f"{'='*80}")
result = medical_agent.process_query(query)
print(f"🎯 使用策略: {result['strategy']}")
print(f"📊 复杂度: {result['complexity_analysis']['complexity_score']}")
print(f"🤖 回答: {result['answer']}")
# 显示技术特定信息
if result['strategy'] == 'rag' and 'sources' in result:
print(f"📚 参考来源: {len(result['sources'])} 个文档")
elif result['strategy'] == 'cot' and 'reasoning_steps' in result:
print(f"🔍 推理步骤: {result['step_count']} 步")
elif result['strategy'] == 'react' and 'execution_steps' in result:
print(f"⚡ 执行步骤: {result['total_steps']} 步")
# 显示对话摘要
print(f"\n{'='*80}")
print("📈 对话摘要")
print(f"{'='*80}")
summary = medical_agent.get_conversation_summary()
print(f"总查询数: {summary['total_queries']}")
print(f"策略分布: {summary['strategy_distribution']}")
if __name__ == "__main__":
demonstrate_vertical_agent()
6. 部署与优化
6.1 性能监控与优化
创建文件:performance_monitor.py
python
import time
import psutil
import GPUtil
from datetime import datetime
from typing import Dict, List, Any
import json
class PerformanceMonitor:
def __init__(self):
self.metrics_history = []
self.start_time = time.time()
def collect_system_metrics(self) -> Dict[str, Any]:
"""收集系统性能指标"""
# CPU使用率
cpu_percent = psutil.cpu_percent(interval=1)
# 内存使用
memory = psutil.virtual_memory()
# 磁盘使用
disk = psutil.disk_usage('/')
# GPU使用(如果可用)
gpu_metrics = {}
try:
gpus = GPUtil.getGPUs()
for i, gpu in enumerate(gpus):
gpu_metrics[f"gpu_{i}"] = {
"load": gpu.load * 100,
"memory_used": gpu.memoryUsed,
"memory_total": gpu.memoryTotal
}
except:
gpu_metrics = {"error": "GPU信息不可用"}
return {
"timestamp": datetime.now().isoformat(),
"cpu_percent": cpu_percent,
"memory_percent": memory.percent,
"memory_used_gb": memory.used / (1024**3),
"memory_total_gb": memory.total / (1024**3),
"disk_percent": disk.percent,
"gpu_metrics": gpu_metrics
}
def record_api_metrics(self, operation: str, duration: float,
success: bool, tokens_used: int = 0) -> Dict[str, Any]:
"""记录API操作指标"""
metric = {
"timestamp": datetime.now().isoformat(),
"operation": operation,
"duration_seconds": duration,
"success": success,
"tokens_used": tokens_used
}
self.metrics_history.append(metric)
return metric
def get_performance_summary(self) -> Dict[str, Any]:
"""获取性能摘要"""
if not self.metrics_history:
return {"message": "暂无性能数据"}
successful_ops = [m for m in self.metrics_history if m["success"]]
failed_ops = [m for m in self.metrics_history if not m["success"]]
if successful_ops:
avg_duration = sum(m["duration_seconds"] for m in successful_ops) / len(successful_ops)
total_tokens = sum(m.get("tokens_used", 0) for m in successful_ops)
else:
avg_duration = 0
total_tokens = 0
return {
"total_operations": len(self.metrics_history),
"successful_operations": len(successful_ops),
"failed_operations": len(failed_ops),
"success_rate": len(successful_ops) / len(self.metrics_history) * 100,
"average_duration_seconds": avg_duration,
"total_tokens_used": total_tokens,
"uptime_hours": (time.time() - self.start_time) / 3600
}
def save_metrics_to_file(self, filename: str = "performance_metrics.json"):
"""保存指标到文件"""
data = {
"system_metrics": self.collect_system_metrics(),
"performance_summary": self.get_performance_summary(),
"recent_operations": self.metrics_history[-50:] # 最近50个操作
}
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"✅ 性能指标已保存到: {filename}")
def monitor_demo():
"""性能监控演示"""
monitor = PerformanceMonitor()
# 模拟一些操作
operations = [
("rag_query", 1.2, True, 450),
("cot_reasoning", 3.5, True, 1200),
("react_execution", 8.7, True, 2300),
("direct_response", 0.8, True, 300),
("api_call", 2.1, False, 0) # 失败的操作
]
for op_name, duration, success, tokens in operations:
monitor.record_api_metrics(op_name, duration, success, tokens)
time.sleep(0.5) # 模拟操作间隔
# 显示性能摘要
summary = monitor.get_performance_summary()
print("📊 性能摘要:")
for key, value in summary.items():
print(f" {key}: {value}")
# 保存指标
monitor.save_metrics_to_file()
if __name__ == "__main__":
monitor_demo()
7. 结论
通过本文详细的教程,我们深入探讨了构建垂直Agent的三大核心技术:RAG、ReAct和CoT。每种技术都有其独特的优势和适用场景:
- RAG 解决了大模型的知识更新和事实准确性问题
- ReAct 提供了复杂问题的动态规划和执行能力
- CoT 显著提升了模型的推理和逻辑分析能力
本教程提供的完整代码可以根据具体业务需求进行调整和优化。