一个简答的意图识别Agent

1. Agent 实现

复制代码

import json
import re
from typing import Dict, List, Any, Optional

class IntentRecognitionAgent:
    def __init__(self):
        self.prompt_template = """你是一个智能意图识别助手。请根据用户输入内容和可用工具列表，准确识别用户意图。

用户当前输入：{content}
历史对话内容：{history_content}
可用工具列表：{tools_list}

请按照以下步骤进行分析：

1. 首先判断用户输入是否为发起测试的任务（包含"测试"、"test"、"检测"、"验证"等关键词）
2. 如果不是测试任务，则按照以下优先级匹配工具：
   - 优先匹配个人工具（scope为"personal"的工具）
   - 其次匹配群组工具（scope为"group"的工具）
   - 最后匹配全量工具（scope为"all"的工具）
3. 匹配时考虑：
   - 工具名称与用户意图的相关性
   - 工具描述与用户需求的匹配度
   - 结合历史对话上下文进行理解

请以JSON格式返回结果：
- 如果是测试任务：{{"type":"测试类","info":"{content}"}}
- 如果匹配到工具：{{"type":"工具类","info": <匹配的工具对象>}}
- 如果没有匹配：{{"type":"其他类","info": "没有找到合适的工具"}}

注意：
1. 只返回JSON格式的结果，不要有其他解释
2. 工具对象要包含完整信息（name, description, scope等）
3. 优先级匹配时，一旦在某个级别找到匹配就返回，不再继续查找
"""

    def model_call(self, model_name: str, messages: List[Dict[str, str]]) -> str:
        """模拟大模型调用"""
        # 这里是模拟函数，实际使用时替换为真实的model_call
        # return model_call(model_name, messages)
        return '{"type":"工具类","info": {"name":"calculator","description":"计算器工具","scope":"personal"}}'

    def parse_llm_response(self, response: str) -> Dict[str, Any]:
        """解析LLM返回的JSON结果"""
        try:
            # 提取JSON部分
            json_pattern = r'\{.*\}'
            match = re.search(json_pattern, response, re.DOTALL)
            if match:
                return json.loads(match.group())
            else:
                return {"type": "其他类", "info": "解析失败"}
        except json.JSONDecodeError:
            return {"type": "其他类", "info": "JSON解析错误"}

    def recognize_intent(self, content: str, history_content: str, tools_list: List[Dict]) -> Dict[str, Any]:
        """识别用户意图的主函数"""
        # 构建prompt
        prompt = self.prompt_template.format(
            content=content,
            history_content=history_content if history_content else "无",
            tools_list=json.dumps(tools_list, ensure_ascii=False, indent=2)
        )
        
        # 调用大模型
        messages = [{"type": "text", "text": prompt}]
        response = self.model_call("deepseek-r1", messages)
        
        # 解析结果
        result = self.parse_llm_response(response)
        
        return result

    def optimize_prompt(self, content: str, history_content: str, tools_list: List[Dict]) -> str:
        """优化版本的prompt，增加更多引导信息"""
        optimization_prompt = """你是一个专业的意图识别助手。请仔细分析用户需求并准确匹配工具。

## 输入信息
- 用户输入：{content}
- 历史对话：{history_content}
- 工具列表：
{formatted_tools}

## 分析步骤

### 步骤1：测试任务识别
判断标准：
- 包含关键词："测试"、"test"、"检测"、"检查"、"验证"、"调试"、"debug"
- 表达测试意图：如"试一下"、"看看能不能用"、"验证一下"

### 步骤2：工具匹配（如果不是测试任务）
匹配策略：
1. **个人工具优先**（scope="personal"）
   - 精确匹配：工具名称包含用户提到的关键词
   - 语义匹配：工具描述与用户需求相关
   
2. **群组工具次之**（scope="group"）
   - 同样进行精确和语义匹配
   
3. **全量工具最后**（scope="all"）
   - 扩大匹配范围

### 步骤3：返回结果
严格按照以下格式返回JSON（不要添加任何其他内容）：

测试任务：
{{"type":"测试类","info":"<用户原始输入>"}}

匹配到工具：
{{"type":"工具类","info": {{"name":"<工具名>","description":"<工具描述>","scope":"<工具范围>"}}}}

未匹配：
{{"type":"其他类","info": "没有找到合适的工具"}}
"""
        
        # 格式化工具列表
        formatted_tools = self._format_tools_by_scope(tools_list)
        
        return optimization_prompt.format(
            content=content,
            history_content=history_content if history_content else "无",
            formatted_tools=formatted_tools
        )

    def _format_tools_by_scope(self, tools_list: List[Dict]) -> str:
        """按scope分组格式化工具列表"""
        personal_tools = [t for t in tools_list if t.get('scope') == 'personal']
        group_tools = [t for t in tools_list if t.get('scope') == 'group']
        all_tools = [t for t in tools_list if t.get('scope') == 'all']
        
        formatted = ""
        if personal_tools:
            formatted += "个人工具：\n"
            for tool in personal_tools:
                formatted += f"  - {tool['name']}: {tool['description']}\n"
        
        if group_tools:
            formatted += "\n群组工具：\n"
            for tool in group_tools:
                formatted += f"  - {tool['name']}: {tool['description']}\n"
        
        if all_tools:
            formatted += "\n全量工具：\n"
            for tool in all_tools:
                formatted += f"  - {tool['name']}: {tool['description']}\n"
        
        return formatted

2. 评判优化过程

复制代码

class AgentEvaluator:
    def __init__(self, agent: IntentRecognitionAgent):
        self.agent = agent
        self.test_cases = []
        self.evaluation_results = []
    
    def add_test_case(self, content: str, history: str, tools: List[Dict], 
                      expected_type: str, expected_info: Any):
        """添加测试用例"""
        self.test_cases.append({
            'content': content,
            'history': history,
            'tools': tools,
            'expected_type': expected_type,
            'expected_info': expected_info
        })
    
    def evaluate(self) -> Dict[str, Any]:
        """执行评估"""
        correct_count = 0
        total_count = len(self.test_cases)
        
        for test_case in self.test_cases:
            result = self.agent.recognize_intent(
                test_case['content'],
                test_case['history'],
                test_case['tools']
            )
            
            is_correct = self._check_result(result, test_case)
            self.evaluation_results.append({
                'test_case': test_case,
                'result': result,
                'is_correct': is_correct
            })
            
            if is_correct:
                correct_count += 1
        
        accuracy = correct_count / total_count if total_count > 0 else 0
        
        return {
            'accuracy': accuracy,
            'correct_count': correct_count,
            'total_count': total_count,
            'details': self.evaluation_results
        }
    
    def _check_result(self, result: Dict, test_case: Dict) -> bool:
        """检查结果是否正确"""
        if result['type'] != test_case['expected_type']:
            return False
        
        if result['type'] == '测试类':
            return result['info'] == test_case['expected_info']
        elif result['type'] == '工具类':
            # 检查工具名称是否匹配
            if isinstance(result['info'], dict) and isinstance(test_case['expected_info'], dict):
                return result['info'].get('name') == test_case['expected_info'].get('name')
        elif result['type'] == '其他类':
            return True
        
        return False
    
    def optimize_agent(self, threshold: float = 0.8) -> bool:
        """基于评估结果优化Agent"""
        evaluation = self.evaluate()
        
        if evaluation['accuracy'] < threshold:
            print(f"准确率 {evaluation['accuracy']:.2%} 低于阈值 {threshold:.2%}")
            print("分析错误案例...")
            
            # 分析错误案例
            error_cases = [r for r in self.evaluation_results if not r['is_correct']]
            
            # 根据错误类型进行优化建议
            for error in error_cases[:5]:  # 只显示前5个错误
                print(f"\n错误案例：")
                print(f"输入：{error['test_case']['content']}")
                print(f"期望：{error['test_case']['expected_type']}")
                print(f"实际：{error['result']['type']}")
            
            return False
        
        print(f"准确率 {evaluation['accuracy']:.2%} 达到要求")
        return True

3. 使用示例

复制代码

# 创建Agent实例
agent = IntentRecognitionAgent()

# 准备测试数据
tools_list = [
    {"name": "calculator", "description": "计算器工具，可以进行数学计算", "scope": "personal"},
    {"name": "translator", "description": "翻译工具，支持多语言翻译", "scope": "personal"},
    {"name": "weather", "description": "天气查询工具", "scope": "group"},
    {"name": "search", "description": "搜索引擎工具", "scope": "all"}
]

# 测试用例1：测试类
result1 = agent.recognize_intent(
    content="我想测试一下系统功能",
    history_content="",
    tools_list=tools_list
)
print("测试用例1结果：", result1)

# 测试用例2：工具匹配
result2 = agent.recognize_intent(
    content="帮我计算一下100+200",
    history_content="",
    tools_list=tools_list
)
print("测试用例2结果：", result2)

# 创建评估器
evaluator = AgentEvaluator(agent)

# 添加测试用例
evaluator.add_test_case(
    "测试一下功能", "", tools_list,
    "测试类", "测试一下功能"
)

evaluator.add_test_case(
    "计算100+200", "", tools_list,
    "工具类", {"name": "calculator"}
)

evaluator.add_test_case(
    "查询明天的天气", "", tools_list,
    "工具类", {"name": "weather"}
)

# 执行评估和优化
evaluator.optimize_agent(threshold=0.8)