生成报告的智能体
智能体:大模型+特定工具,对于大模型部分要给出整体执行流程的提示词,类似于工作大纲指导大模型逐步完成任务(选择工具);对于工具要告诉大模型在完成某步的时候调用的工具实现任务(执行工具)。
传入工具
python
from openai import AsyncAzureOpenAI, AsyncOpenAI
import logging
logger_websocket = logging.getLogger("websocket_deep_thinking")
ds_3_2_tools = [
{
"type": "function",
"function": {
"name": "web_fetch",
"description": "使用高速爬虫批量获取网页原始文本,是深入阅读网页的关键步骤。每处理一个URL消耗1个web_fetch预算单位。",
"parameters": {
"type": "object",
"properties": {
"tasks": {
"type": "array",
"description": "必填。包含需要爬取的任务列表。",
"items": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "必填。需要爬取的网页URL地址。"
},
"title": {
"type": "string",
"description": "必填。网页标题,用于标识抓取任务。"
}
},
"required": [
"url",
"title"
]
}
}
},
"required": [
"tasks"
]
}
}
},
{
"type": "function",
"function": {
"name": "content_extract",
"description": "全场景信息加工工具。支持处理web_fetch爬虫数据、knowledge_search检索结果或read_file读取的本地/父目录文件。工具会自动将网页URL或本地文件名映射为来源,并生成唯一的引用ID。不消耗预算单位。",
"parameters": {
"type": "object",
"properties": {
"fetch_results": {
"type": "array",
"description": "必填。包含原始文本的数据列表,支持爬虫返回的Dict列表或自定义的内容字典。",
"items": {
"type": "object"
}
},
"query": {
"type": "string",
"description": "必填。指导模型提取事实的方向(例如:提取公司技术亮点、提取市场数据等)。"
},
"memory_bank_path": {
"type": "string",
"description": "必填。存储提取证据的记忆库文件路径,相对于工作空间,如'memory_bank.json'。"
}
},
"required": ["fetch_results", "query", "memory_bank_path"]
}
}
},
]
class GPTClient:
def __init__(
self,
api_key: str,
model_or_deployment: str,
mode: Literal["azure", "openai"] = "openai",
api_base: Optional[str] = None,
api_version: Optional[str] = None,
):
self.mode = mode
self.model_or_deployment = model_or_deployment
if mode == "azure":
if not api_base or not api_version:
raise ValueError("Azure mode requires `api_base` and `api_version`.")
self.client = AsyncAzureOpenAI(
api_key=api_key,
api_version=api_version,
azure_endpoint=api_base,
)
else:
self.client = AsyncOpenAI(
api_key=api_key,
base_url=api_base or "https://api.openai.com/v1",
# timeout=httpx.Timeout(
# connect=15.0, # 连接超时
# read=600.0, # 读取超时(包括流式响应)
# )
)
def chat(self, system_prompt, user_input):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
response = self.client.chat.completions.create(
model=self.model_or_deployment,
messages=messages,
temperature=0.7,
max_tokens=512,
)
response_content = response.choices[0].message.content
output_tokens = response.usage.completion_tokens
input_tokens = response.usage.prompt_tokens
return response_content, input_tokens, output_tokens
async def chat_async_tool_select(self, authorization, messages):
# messages = [
# {"role": "system", "content": system_prompt},
# {"role": "user", "content": user_input},
# ]
# print(messages)
response = await self.client.chat.completions.create(
model=self.model_or_deployment,
messages=messages,
temperature=0,
top_p=0.1,
max_tokens=8192,
tools=ds_3_2_tools
)
print(f"大模型直接的输出:{response}")
tool_use = response.choices[0].message.tool_calls
response_content = response.choices[0].message.content
output_tokens = response.usage.completion_tokens
input_tokens = response.usage.prompt_tokens
return tool_use, response_content, input_tokens, output_tokens
def create_deepseek_client():
api_key = ""
deployment_id = ""
api_base = ""
api_version = ""
client = GPTClient(
api_key=api_key,
model_or_deployment=deployment_id,
mode="openai",
api_base=api_base,
api_version=api_version,
)
return client
工具选择
python
class ToolSelect:
def __init__(self):
self.deepseek_official_client = create_deepseek_official_client()
self.gemini_client = create_gemini_client()
async def tool_select(self, authorization, query, assistant_message, budge_info):
try:
with open("create_report_agent_light_prompt.txt") as f:
system_prompt = f.read()
with open("user_input.txt") as f:
user_input_templete = f.read()
user_input = user_input_templete.replace("{user_question}", query)
budge_templete = """
<budget>
web_search 已使用budget:{web_search_used},剩余查询budget:{web_search_remain}
process_and_memorize 已使用budget:{process_and_memorize_used},剩余URL处理budget:{process_and_memorize_remain}
</budget>
"""
budge_info = budge_templete. \
replace("{web_search_used}", str(budge_info.get("web_search_used"))). \
replace("{web_search_remain}", str(budge_info.get("web_search_remain"))). \
replace("{process_and_memorize_used}", str(budge_info.get("process_and_memorize_used"))). \
replace("{process_and_memorize_remain}", str(budge_info.get("process_and_memorize_remain")))
budge_message = [{"role": "system", "content": budge_info}]
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}]
messages.extend(assistant_message)
messages.extend(budge_message)
# create_report_logger.info(f"message:{messages}")
tool_use, response_content, input_tokens, output_tokens = await self.deepseek_official_client.chat_async_tool_select(
authorization, messages)
# tool_use, response_content, input_tokens, output_tokens = await self.gemini_client.chat_async_tool_select(
# authorization, messages)
return response_content, tool_use
except Exception as e:
print("模型报错!!!")
print(e)
执行工具
python
async def execute_tools(self, func_name, func_args, budge_info, need_finish):
# 执行工具,返回执行结果以及更新后的budge_info
if func_name == "bash":
command = func_args.get("command")
timeout = func_args.get("timeout")
result = self.bash_tool_client.run(command, timeout)
elif func_name == "WebSearch":
request_data = {"param_json":func_args.get("search_parameter")}
# response_result = await call_websearch(request_data)
tool_name = "Bocha"
# tool_name = "google"
response_result = await get_web_search_result(tool_name, request_data)
if response_result.get("status") == 'success':
budge_info["web_search_used"] += 1
budge_info["web_search_remain"] -= 1
result = response_result
elif func_name == "web_fetch":
# 爬虫功能:消耗 web_fetch 预算
tasks = func_args.get("tasks", [])
# 调用爬取函数
fetch_results = await self.process_and_memory_client.web_fetch_all(tasks)
# 计算实际发起的任务数量并扣除预算
task_count = len(tasks)
budge_info["process_and_memorize_used"] += task_count
budge_info["process_and_memorize_remain"] -= task_count
result = fetch_results
elif func_name == "content_extract":
# 提取功能:处理爬虫结果或RAG结果,不消耗单位(成本为0)
fetch_results = func_args.get("fetch_results")
query = func_args.get("query")
memory_bank_path = func_args.get("memory_bank_path")
# 调用我们之前重构的批量处理函数
extract_result = await self.process_and_memory_client.process_and_memorize_all(
fetch_results,
query,
memory_bank_path
)
result = extract_result
工具实现
python
class BashTool:
def __init__(self,
work_dir: str = "./workspace",
conda_home: Optional[str] = None,
conda_env: Optional[str] = None):
"""
初始化 Bash 工具
:param work_dir: 命令执行的基础目录
:param conda_home: Conda 的安装根目录 (例如 /data/aliang/anaconda3 或 miniconda3)
如果传入此参数,将尝试自动构造激活命令。
:param conda_env: 需要激活的 Conda 环境名称 (例如 aliang_py311)
"""
self.work_dir = os.path.abspath(work_dir)
if not os.path.exists(self.work_dir):
os.makedirs(self.work_dir)
self.conda_home = conda_home
self.conda_env = conda_env
self.authorization = ""
def _wrap_command_with_env(self, command: str) -> str:
"""
内部辅助方法:如果配置了 Conda 环境,将命令包装在环境激活指令中
"""
if self.conda_home and self.conda_env:
# 构造 Conda 初始化脚本的路径
# 通常位于 <conda_home>/etc/profile.d/conda.sh
conda_sh_path = os.path.join(self.conda_home, "etc", "profile.d", "conda.sh")
# 组合命令: source conda.sh && conda activate env && 原命令
# 注意:使用 bash -c 需要将整个字符串传进去,但在 subprocess shell=True 下,
# 我们可以直接通过 && 连接
wrapped_cmd = f"source {conda_sh_path} && conda activate {self.conda_env} && {command}"
return wrapped_cmd
return command
def run(self, command: str, timeout: int = 4000) -> str:
"""
执行 Bash 命令
"""
target_script = "write_aync.py"
if target_script in command:
# 检查是否已经包含了这些参数(防止重复注入,虽然 argparse 允许重复但通常取最后一个)
# 重点:路径参数加上双引号 ""
injection = f' --workspace_path "{self.work_dir}" --authorization "{self.authorization}"'
# 将注入的参数追加到原始命令后面
# 原始 command 类似于: ".../python .../write_aync.py --filename_prefix 'xxx'"
command = command + injection
# 3. 注入环境 (此时传入的是可能已经被修改过的 command)
final_command = self._wrap_command_with_env(command)
# 打印调试信息 (可选,方便看到实际执行了什么)
print(f"[*] Executing: {final_command}")
try:
result = subprocess.run(
final_command,
shell=True,
cwd=self.work_dir,
capture_output=True,
text=True,
timeout=timeout,
executable='/bin/bash' # 强制使用 bash,因为 source 是 bash 内建指令
)
if result.returncode == 0:
output = result.stdout.strip()
return output if output else "Success (No output)"
else:
return f"Error (Return Code {result.returncode}):\n{result.stderr.strip()}"
except subprocess.TimeoutExpired:
return f"Error: Command timed out after {timeout}s."
except Exception as e:
return f"System Error: {str(e)}"
主循环:选择工具+执行工具
python
while True:
# assistant 的信息会动态调整,需要重新传入tool_select
response_content, function_call_result = await self.tool_select_client.tool_select(authorization, query, assistant_message, budge_info)
create_report_logger.info(f"工具选择 response_content 信息:{response_content}")
create_report_logger.info(f"工具选择 function_call_result 信息:{function_call_result}")
if not function_call_result:
return response_content
else:
assistant_message, budge_info, need_finish = await self.execute_client.process_execute_request(response_content, function_call_result, assistant_message, budge_info)
if need_finish:
# 目前只有answer_user需要退出
ask_user_content = assistant_message[-1].get("content")
return ask_user_content
目前报告生成逻辑:
(1)调研关于报告的相关内容,调研分为外部知识库、联网搜索和历史检索结果(上下文对话的检索文件)。
(2)对于联网搜索的结果返回的是由标题名、链接等组成的字典列表。使用爬虫工具获取网址全部内容。
(3)原子证据提取,对于检索到的内容分成逐条的原子证据,在生成报告的时候减少幻觉,存入字典组成的证据库。
(4)生成报告。
在上述主要流程中,还包含检查证据是否完善等,循环执行直到完成任务。今日任务将爬虫和提取证据的函数分开两个原子功能,便于其他模块复用和大模型自由选择。