03-深度学习基础：Agent与工具调用

Agent与工具调用：让大模型拥有行动能力

一、什么是Agent？

1.1 Agent vs 普通LLM

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("Agent：让LLM拥有行动能力")
print("=" * 60)

# 普通LLM vs Agent对比
comparison = """
╔══════════════════╦══════════════════════════════════════════════════════════════╗
║     普通LLM      ║                    Agent                                     ║
╠══════════════════╬══════════════════════════════════════════════════════════════╣
║ 只能生成文本     ║ 可以执行行动                                               ║
║ 被动回答问题     ║ 主动完成任务                                               ║
║ 无记忆（单轮）   ║ 有记忆（多轮交互）                                         ║
║ 无法使用工具     ║ 可以调用工具（搜索、计算、API）                            ║
║ 一次性输出       ║ 循环思考-行动-观察                                         ║
╚══════════════════╩══════════════════════════════════════════════════════════════╝
"""

print(comparison)

# 可视化对比
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# 普通LLM
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('普通LLM：一次性输出', fontsize=12)
ax1.text(0.5, 0.7, '用户问题', ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightblue'))
ax1.annotate('', xy=(0.5, 0.55), xytext=(0.5, 0.65),
            arrowprops=dict(arrowstyle='->', lw=2))
ax1.text(0.5, 0.45, 'LLM', ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightgreen'))
ax1.annotate('', xy=(0.5, 0.3), xytext=(0.5, 0.4),
            arrowprops=dict(arrowstyle='->', lw=2))
ax1.text(0.5, 0.2, '最终答案', ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightcoral'))

# Agent
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('Agent：循环思考-行动-观察', fontsize=12)

# 循环节点
nodes = [
    ("用户问题", 0.5, 0.85),
    ("思考\n(Thought)", 0.5, 0.65),
    ("行动\n(Action)", 0.25, 0.45),
    ("观察\n(Observation)", 0.5, 0.45),
    ("工具执行", 0.75, 0.45),
    ("最终答案", 0.5, 0.2)
]

for label, x, y in nodes:
    circle = plt.Circle((x, y), 0.08, color='lightblue', ec='black')
    ax2.add_patch(circle)
    ax2.text(x, y, label, ha='center', va='center', fontsize=7)

# 连接线
ax2.annotate('', xy=(0.5, 0.77), xytext=(0.5, 0.73), arrowprops=dict(arrowstyle='->', lw=1))
ax2.annotate('', xy=(0.33, 0.57), xytext=(0.42, 0.62), arrowprops=dict(arrowstyle='->', lw=1))
ax2.annotate('', xy=(0.5, 0.53), xytext=(0.58, 0.57), arrowprops=dict(arrowstyle='->', lw=1))
ax2.annotate('', xy=(0.5, 0.37), xytext=(0.5, 0.28), arrowprops=dict(arrowstyle='->', lw=1))

# 循环箭头
ax2.annotate('', xy=(0.42, 0.62), xytext=(0.58, 0.62),
            arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.3', lw=1, color='red'))

ax2.text(0.5, 0.1, '循环直到任务完成', ha='center', fontsize=9,
         bbox=dict(boxstyle='round', facecolor='lightyellow'))

plt.suptitle('普通LLM vs Agent', fontsize=14)
plt.tight_layout()
plt.show()

print("\n💡 Agent的核心能力:")
print("   1. 推理 (Reasoning): 思考如何解决问题")
print("   2. 行动 (Acting): 调用工具执行操作")
print("   3. 观察 (Observing): 获取工具执行结果")
print("   4. 记忆 (Memory): 记住历史交互")

二、ReAct模式

2.1 ReAct原理

python 复制代码

def react_pattern():
    """ReAct模式详解"""
    
    print("\n" + "=" * 60)
    print("ReAct：Reasoning + Acting")
    print("=" * 60)
    
    # ReAct循环可视化
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.axis('off')
    
    # 绘制ReAct循环
    import matplotlib.patches as patches
    
    # 中心圆
    center = plt.Circle((0.5, 0.5), 0.15, color='lightblue', ec='black')
    ax.add_patch(center)
    ax.text(0.5, 0.5, 'ReAct\n循环', ha='center', va='center', fontsize=10, fontweight='bold')
    
    # 三个节点
    nodes = [
        ("Thought\n(思考)", 0.2, 0.7),
        ("Action\n(行动)", 0.8, 0.7),
        ("Observation\n(观察)", 0.8, 0.3),
    ]
    
    for label, x, y in nodes:
        circle = plt.Circle((x, y), 0.12, color='lightgreen', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, label, ha='center', va='center', fontsize=9)
    
    # 箭头
    ax.annotate('', xy=(0.38, 0.62), xytext=(0.32, 0.58),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.68, 0.62), xytext=(0.62, 0.58),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.68, 0.42), xytext=(0.62, 0.38),
               arrowprops=dict(arrowstyle='->', lw=2))
    
    # 循环箭头回到思考
    ax.annotate('', xy=(0.32, 0.42), xytext=(0.38, 0.38),
               arrowprops=dict(arrowstyle='->', lw=2, color='red'))
    
    ax.text(0.5, 0.15, '循环直到获得最终答案', ha='center', fontsize=10,
           bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('ReAct模式：思考 → 行动 → 观察', fontsize=14)
    
    plt.tight_layout()
    plt.show()
    
    # 示例
    print("\n📝 ReAct示例（查询天气）:")
    print("""
    Question: 北京今天天气怎么样？
    
    Thought 1: 我需要查询北京的天气信息
    Action 1: search[北京天气]
    Observation 1: 北京今天晴，温度15-25°C
    
    Thought 2: 我已经获得了天气信息，可以回答用户了
    Action 2: finish[北京今天晴天，温度15-25°C]
    """)

react_pattern()

2.2 简单Agent实现

python 复制代码

class SimpleAgent:
    """简单的ReAct Agent实现"""
    
    def __init__(self, llm, tools):
        self.llm = llm
        self.tools = {tool.name: tool for tool in tools}
        self.max_iterations = 5
        self.memory = []
    
    def run(self, question):
        """运行Agent"""
        print(f"\n📝 用户问题: {question}")
        print("-" * 50)
        
        current_question = question
        iteration = 0
        
        while iteration < self.max_iterations:
            iteration += 1
            print(f"\n🔄 迭代 {iteration}")
            
            # 1. 思考
            thought = self._think(current_question)
            print(f"💭 思考: {thought}")
            
            # 2. 决定行动
            action, action_input = self._decide_action(thought)
            print(f"🎯 行动: {action}({action_input})")
            
            # 3. 检查是否完成
            if action == "finish":
                print(f"✅ 最终答案: {action_input}")
                return action_input
            
            # 4. 执行工具
            if action in self.tools:
                observation = self.tools[action].run(action_input)
                print(f"👁️ 观察: {observation}")
                current_question = f"之前的问题: {question}\n观察结果: {observation}\n下一步?"
            else:
                print(f"❌ 未知工具: {action}")
                break
        
        print("❌ 达到最大迭代次数")
        return None
    
    def _think(self, question):
        """模拟思考过程"""
        # 简化实现，实际应调用LLM
        if "天气" in question:
            return "需要查询天气信息"
        elif "计算" in question or "多少" in question:
            return "需要进行计算"
        else:
            return "可以直接回答"
    
    def _decide_action(self, thought):
        """决定下一步行动"""
        if "查询" in thought:
            return ("search", "北京天气")
        elif "计算" in thought:
            return ("calculate", "2 + 2")
        else:
            return ("finish", "这是最终答案")


# 定义工具
class Tool:
    def __init__(self, name, func, description):
        self.name = name
        self.func = func
        self.description = description
    
    def run(self, input_str):
        return self.func(input_str)


def search_tool(query):
    """模拟搜索工具"""
    if "天气" in query:
        return "北京今天晴天，温度15-25°C"
    return f"搜索结果: 关于'{query}'的信息"

def calculate_tool(expression):
    """模拟计算工具"""
    try:
        result = eval(expression)
        return f"{expression} = {result}"
    except:
        return f"无法计算: {expression}"

# 测试简单Agent
print("\n" + "=" * 60)
print("简单Agent测试")
print("=" * 60)

tools = [
    Tool("search", search_tool, "搜索信息"),
    Tool("calculate", calculate_tool, "数学计算")
]

agent = SimpleAgent(llm=None, tools=tools)
agent.run("北京今天天气怎么样？")

三、工具定义与调用

3.1 常用工具类型

python 复制代码

def tools_overview():
    """常用工具类型"""
    
    print("\n" + "=" * 60)
    print("Agent常用工具")
    print("=" * 60)
    
    tools_list = {
        "🔍 搜索工具": "Google搜索、Bing搜索、Wikipedia",
        "🧮 计算工具": "数学计算、代码执行",
        "📁 文件工具": "读写文件、数据处理",
        "🌐 API工具": "调用外部API（天气、新闻、股票）",
        "💾 数据库工具": "查询SQL数据库",
        "📧 通信工具": "发送邮件、消息",
        "🤖 其他AI": "调用其他AI模型"
    }
    
    for tool, examples in tools_list.items():
        print(f"\n  {tool}: {examples}")
    
    # 工具定义示例
    print("\n📐 工具定义示例:")
    print("""
    from typing import Optional, Type
    from pydantic import BaseModel, Field
    
    class WeatherInput(BaseModel):
        city: str = Field(description="城市名称")
        unit: Optional[str] = Field("celsius", description="温度单位")
    
    def get_weather(city: str, unit: str = "celsius") -> str:
        \"\"\"获取城市天气\"\"\"
        # 调用天气API
        return f"{city}天气: 晴天, 25°C"
    
    # LangChain工具定义
    from langchain.tools import tool
    
    @tool
    def search(query: str) -> str:
        \"\"\"搜索网络信息\"\"\"
        return search_api(query)
    """)

tools_overview()

3.2 工具调用流程

python 复制代码

def tool_calling_flow():
    """工具调用流程"""
    
    print("\n" + "=" * 60)
    print("工具调用流程")
    print("=" * 60)
    
    # 可视化工具调用流程
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.axis('off')
    
    # 流程节点
    steps = [
        ("用户输入", 0.05, 0.5),
        ("LLM决策", 0.25, 0.5),
        ("解析工具调用", 0.45, 0.5),
        ("执行工具", 0.65, 0.5),
        ("返回结果", 0.85, 0.5),
    ]
    
    for label, x, y in steps:
        box = plt.Rectangle((x-0.07, y-0.08), 0.14, 0.16,
                            facecolor='lightblue', ec='black')
        ax.add_patch(box)
        ax.text(x, y, label, ha='center', va='center', fontsize=8)
        
        if x < 0.85:
            ax.annotate('', xy=(x+0.08, 0.5), xytext=(x+0.05, 0.5),
                       arrowprops=dict(arrowstyle='->', lw=2))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('工具调用流程', fontsize=12)
    
    plt.tight_layout()
    plt.show()
    
    # 工具调用格式
    print("\n📐 OpenAI Function Calling格式:")
    print("""
    # 定义工具
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "获取指定城市的天气",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "city": {
                            "type": "string",
                            "description": "城市名称"
                        }
                    },
                    "required": ["city"]
                }
            }
        }
    ]
    
    # LLM响应
    response = {
        "role": "assistant",
        "content": None,
        "tool_calls": [
            {
                "id": "call_123",
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "arguments": '{"city": "北京"}'
                }
            }
        ]
    }
    """)

tool_calling_flow()

四、多Agent协作

4.1 多Agent架构

python 复制代码

def multi_agent_architecture():
    """多Agent协作架构"""
    
    print("\n" + "=" * 60)
    print("多Agent协作")
    print("=" * 60)
    
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.axis('off')
    
    # 主Agent
    main = plt.Circle((0.5, 0.7), 0.1, color='lightcoral', ec='black')
    ax.add_patch(main)
    ax.text(0.5, 0.7, '主Agent\n(协调者)', ha='center', va='center', fontsize=8)
    
    # 子Agent
    sub_agents = [
        ("搜索Agent", 0.2, 0.4),
        ("计算Agent", 0.5, 0.4),
        ("代码Agent", 0.8, 0.4),
    ]
    
    for name, x, y in sub_agents:
        circle = plt.Circle((x, y), 0.08, color='lightgreen', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, name, ha='center', va='center', fontsize=7)
        
        # 连接到主Agent
        ax.annotate('', xy=(x, y+0.08), xytext=(0.5, 0.6),
                   arrowprops=dict(arrowstyle='->', lw=1))
    
    # 用户输入
    user = plt.Rectangle((0.35, 0.85), 0.3, 0.08,
                         facecolor='lightblue', ec='black')
    ax.add_patch(user)
    ax.text(0.5, 0.89, '用户输入', ha='center', va='center', fontsize=8)
    ax.annotate('', xy=(0.5, 0.8), xytext=(0.5, 0.85),
               arrowprops=dict(arrowstyle='->', lw=1))
    
    # 最终输出
    output = plt.Rectangle((0.35, 0.1), 0.3, 0.08,
                           facecolor='lightyellow', ec='black')
    ax.add_patch(output)
    ax.text(0.5, 0.14, '最终输出', ha='center', va='center', fontsize=8)
    ax.annotate('', xy=(0.5, 0.2), xytext=(0.5, 0.25),
               arrowprops=dict(arrowstyle='->', lw=1))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('多Agent协作架构', fontsize=14)
    
    plt.tight_layout()
    plt.show()
    
    print("\n💡 多Agent模式:")
    print("   1. 层级式: 主Agent分配任务给子Agent")
    print("   2. 对等式: Agent之间平等协作")
    print("   3. 流水线式: 依次处理")
    print("   4. 辩论式: 多个Agent讨论达成共识")

multi_agent_architecture()

4.2 AutoGPT/BabyAGI原理

python 复制代码

def autogpt_principle():
    """AutoGPT/BabyAGI原理"""
    
    print("\n" + "=" * 60)
    print("AutoGPT/BabyAGI原理")
    print("=" * 60)
    
    print("\n📐 AutoGPT核心组件:")
    components = {
        "任务列表": "存储待完成的任务",
        "执行器": "执行具体任务",
        "记忆系统": "存储历史信息",
        "优先级排序": "决定下一步做什么"
    }
    
    for name, desc in components.items():
        print(f"   • {name}: {desc}")
    
    # AutoGPT循环
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.axis('off')
    
    # 循环节点
    nodes = [
        ("获取\n任务", 0.15, 0.5),
        ("执行\n任务", 0.35, 0.5),
        ("观察\n结果", 0.55, 0.5),
        ("记忆\n存储", 0.75, 0.5),
        ("创建\n新任务", 0.75, 0.2),
        ("任务\n列表", 0.35, 0.2),
    ]
    
    for label, x, y in nodes:
        circle = plt.Circle((x, y), 0.08, color='lightblue', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, label, ha='center', va='center', fontsize=7)
    
    # 连接线
    ax.annotate('', xy=(0.23, 0.5), xytext=(0.27, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
    ax.annotate('', xy=(0.43, 0.5), xytext=(0.47, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
    ax.annotate('', xy=(0.63, 0.5), xytext=(0.67, 0.5), arrowprops=dict(arrowstyle='->', lw=1))
    ax.annotate('', xy=(0.75, 0.42), xytext=(0.75, 0.28), arrowprops=dict(arrowstyle='->', lw=1))
    ax.annotate('', xy=(0.67, 0.2), xytext=(0.43, 0.2), arrowprops=dict(arrowstyle='->', lw=1))
    ax.annotate('', xy=(0.35, 0.28), xytext=(0.35, 0.42), arrowprops=dict(arrowstyle='->', lw=1))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('AutoGPT循环', fontsize=12)
    
    plt.tight_layout()
    plt.show()

autogpt_principle()

五、实战：构建智能助手

5.1 完整Agent实现

python 复制代码

import json
import requests
from typing import List, Dict, Any, Callable
from dataclasses import dataclass
from enum import Enum

class Role(Enum):
    USER = "user"
    ASSISTANT = "assistant"
    TOOL = "tool"

@dataclass
class Message:
    role: Role
    content: str
    tool_calls: List[Dict] = None
    tool_call_id: str = None

class ToolCallAgent:
    """完整的工具调用Agent"""
    
    def __init__(self, model_name="gpt-3.5-turbo"):
        self.model_name = model_name
        self.tools = {}
        self.messages = []
        self.max_iterations = 10
    
    def register_tool(self, name: str, func: Callable, description: str, parameters: Dict):
        """注册工具"""
        self.tools[name] = {
            "func": func,
            "description": description,
            "parameters": parameters
        }
    
    def _get_tools_schema(self):
        """获取工具schema"""
        tools_schema = []
        for name, tool in self.tools.items():
            tools_schema.append({
                "type": "function",
                "function": {
                    "name": name,
                    "description": tool["description"],
                    "parameters": tool["parameters"]
                }
            })
        return tools_schema
    
    def _call_llm(self, messages):
        """调用LLM（模拟）"""
        # 实际应用中这里调用OpenAI API
        # 这里用模拟实现
        last_message = messages[-1].content
        
        # 简单的意图识别
        if "天气" in last_message:
            return {
                "content": None,
                "tool_calls": [{
                    "function": {"name": "get_weather", "arguments": '{"city": "北京"}'}
                }]
            }
        elif "计算" in last_message:
            return {
                "content": None,
                "tool_calls": [{
                    "function": {"name": "calculate", "arguments": '{"expression": "2+2"}'}
                }]
            }
        else:
            return {
                "content": "我是AI助手，有什么可以帮您的？",
                "tool_calls": None
            }
    
    def _execute_tool(self, tool_name, arguments):
        """执行工具"""
        if tool_name not in self.tools:
            return f"错误: 未找到工具 {tool_name}"
        
        tool = self.tools[tool_name]
        try:
            args = json.loads(arguments)
            result = tool["func"](**args)
            return str(result)
        except Exception as e:
            return f"执行错误: {e}"
    
    def run(self, user_input: str):
        """运行Agent"""
        # 添加用户消息
        self.messages.append(Message(role=Role.USER, content=user_input))
        
        iteration = 0
        while iteration < self.max_iterations:
            iteration += 1
            print(f"\n🔄 迭代 {iteration}")
            
            # 调用LLM
            response = self._call_llm(self.messages)
            
            # 处理响应
            if response["tool_calls"]:
                # 有工具调用
                for tool_call in response["tool_calls"]:
                    tool_name = tool_call["function"]["name"]
                    arguments = tool_call["function"]["arguments"]
                    
                    print(f"🔧 调用工具: {tool_name}({arguments})")
                    result = self._execute_tool(tool_name, arguments)
                    print(f"📊 结果: {result}")
                    
                    # 添加工具结果到消息
                    self.messages.append(Message(
                        role=Role.TOOL,
                        content=result,
                        tool_call_id=tool_call.get("id", "unknown")
                    ))
            else:
                # 直接响应
                answer = response["content"]
                print(f"✅ 最终答案: {answer}")
                self.messages.append(Message(role=Role.ASSISTANT, content=answer))
                return answer
        
        print("❌ 达到最大迭代次数")
        return None


# 定义工具函数
def get_weather(city: str, unit: str = "celsius") -> str:
    """获取天气信息"""
    weather_data = {
        "北京": "晴天，温度25°C",
        "上海": "多云，温度22°C",
        "广州": "雨天，温度28°C"
    }
    return weather_data.get(city, f"{city}天气: 晴，温度20°C")

def calculate(expression: str) -> str:
    """数学计算"""
    try:
        result = eval(expression)
        return f"{expression} = {result}"
    except:
        return f"计算错误: {expression}"

def search(query: str) -> str:
    """搜索信息"""
    return f"关于'{query}'的搜索结果: 找到相关信息..."

# 测试Agent
print("\n" + "=" * 60)
print("智能助手Agent测试")
print("=" * 60)

agent = ToolCallAgent()

# 注册工具
agent.register_tool(
    "get_weather",
    get_weather,
    "获取城市天气信息",
    {
        "type": "object",
        "properties": {
            "city": {"type": "string", "description": "城市名称"},
            "unit": {"type": "string", "description": "温度单位"}
        },
        "required": ["city"]
    }
)

agent.register_tool(
    "calculate",
    calculate,
    "数学计算",
    {
        "type": "object",
        "properties": {
            "expression": {"type": "string", "description": "数学表达式"}
        },
        "required": ["expression"]
    }
)

# 测试
agent.run("北京今天天气怎么样？")

六、最佳实践与总结

python 复制代码

def best_practices():
    """Agent最佳实践"""
    
    print("\n" + "=" * 60)
    print("Agent最佳实践")
    print("=" * 60)
    
    practices = """
    📌 设计原则:
    
    1. 工具设计
       - 每个工具职责单一
       - 提供清晰的描述
       - 定义明确的输入输出
    
    2. 提示工程
       - 清晰说明可用工具
       - 给出使用示例
       - 限制最大迭代次数
    
    3. 错误处理
       - 工具调用失败要有fallback
       - 添加超时机制
       - 记录执行日志
    
    4. 安全考虑
       - 限制工具权限
       - 沙箱执行代码
       - 敏感操作需确认
    
    5. 性能优化
       - 并行执行独立工具
       - 缓存常用结果
       - 异步处理长任务
    """
    
    print(practices)
    
    # 总结
    print("\n" + "=" * 60)
    print("Agent核心要点总结")
    print("=" * 60)
    
    summary = """
    ╔══════════════════╦══════════════════════════════════════════════════════════════╗
    ║     概念         ║                    说明                                      ║
    ╠══════════════════╬══════════════════════════════════════════════════════════════╣
    ║ ReAct            ║ 思考-行动-观察循环，让LLM自主决策                            ║
    ║ Tool             ║ 赋予LLM外部能力（搜索、计算、API）                          ║
    ║ Memory           ║ 记住历史交互，实现多轮对话                                  ║
    ║ Multi-Agent      ║ 多个Agent协作完成复杂任务                                   ║
    ║ AutoGPT          ║ 自动分解任务、执行、创建子任务                              ║
    ╚══════════════════╩══════════════════════════════════════════════════════════════╝
    """
    
    print(summary)

best_practices()

七、总结

概念	作用	关键点
ReAct	决策循环	Thought → Action → Observation
Tool	扩展能力	搜索、计算、API调用
Memory	上下文记忆	短期记忆、长期记忆
Multi-Agent	协作分工	主从式、对等式

Agent开发流程：

复制代码

定义目标 → 设计工具 → 编写提示 → 实现循环 → 测试优化

记住：

Agent让LLM从"说话"变成"做事"
工具是Agent能力的边界
ReAct是实现自主决策的经典模式
安全性和可控性很重要