对前篇SKILLHARNESS:让AI Agent学会"安全地做事"- 掘金的进一步剖析
前言
我们在开发AI Agent时经常遇到这个问题:Agent学会了一个技能,但这个技能在环境变化时要么失效,要么产生危险行为。
今天我们来复现浙江大学提出的SKILLHARNESS 框架,它解决了一个核心问题:如何让Agent学会"什么时候可以用这个技能"。
读完这篇文章,你将知道:
- 如何用代码实现宏技能和微技能的解耦表示
- 如何实现"三源监督"的技能学习
- 如何实现"选择性激活"的安全利用机制
- 如何实现"模板重放+LLM回退"的双模式执行
一、核心概念速览
在开始写代码之前,先弄清楚SKILLHARNESS的两层设计:
arduino
┌─────────────────────────────────────────────────────────────┐
│ 宏技能层 (战略层) │
│ "我知道这个技能的目标是什么" │
│ "成功的时候是什么样子" │
│ "有哪些坑不能踩" │
│ "什么前提条件必须满足" │
└─────────────────────────────────────────────────────────────┘
↓ 关联微技能
┌─────────────────────────────────────────────────────────────┐
│ 微技能层 (战术层) │
│ "具体怎么操作" │
│ "用模板 + 运行时参数绑定" │
│ "模板失效了走LLM" │
└─────────────────────────────────────────────────────────────┘
二、数据结构:技能如何表示
2.1 宏技能:战略层
python
from dataclasses import dataclass, field
from typing import List, Optional, Set
from enum import Enum
class SkillStatus(Enum):
ACTIVE = "active"
SUSPENDED = "suspended" # 安全边界不满足时暂停
DEPRECATED = "deprecated"
@dataclass
class SuccessPattern:
"""成功模式:do + done_when"""
do: str # 可复用的动作路径描述
done_when: str # 可观测的完成条件
confidence: float = 1.0 # 置信度
@dataclass
class Lesson:
"""教训:从失败中学到的经验"""
failure_type: str # 失败类型
recovery_signal: str # 恢复信号
generalization: str # 泛化描述
@dataclass
class RiskGuard:
"""风险守卫:环境必须满足的条件"""
condition: str # 条件描述
description: str # 风险说明
severity: str = "medium" # severity: low/medium/high
@dataclass
class MacroSkill:
"""宏技能:捕获可复用策略"""
skill_id: str
intent: str # φ: 宏意图,自然语言目标
success_patterns: List[SuccessPattern] = field(default_factory=list) # P
lessons: List[Lesson] = field(default_factory=list) # L
risk_guards: List[RiskGuard] = field(default_factory=list) # R
linked_micro_skills: Set[str] = field(default_factory=set) # N_M
status: SkillStatus = SkillStatus.ACTIVE
def add_success_pattern(self, do: str, done_when: str):
"""添加成功模式"""
pattern = SuccessPattern(do=do, done_when=done_when)
self.success_patterns.append(pattern)
def add_lesson(self, failure_type: str, recovery: str):
"""添加教训"""
lesson = Lesson(
failure_type=failure_type,
recovery_signal=recovery,
generalization=f"遇到{failure_type}时,尝试{recovery}"
)
self.lessons.append(lesson)
def add_risk_guard(self, condition: str, description: str):
"""添加风险守卫"""
guard = RiskGuard(condition=condition, description=description)
self.risk_guards.append(guard)
def __repr__(self):
return f"MacroSkill(id={self.skill_id}, intent={self.intent})"
2.2 微技能:战术层
python
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
import re
@dataclass
class MicroSkill:
"""微技能:参数化的动作序列"""
skill_id: str
semantic_label: str # σ: 语义标签,如"点击提交按钮"
execution_template: str # E: 执行模板,占位符格式如"click('{button_id}')"
placeholders: Set[str] = field(default_factory=set) # Θ: 占位符集合
bind_count: int = 0 # 成功绑定次数
consecutive_failures: int = 0 # 连续失败次数
max_failures_before_bypass: int = 3 # 连续失败阈值,触发绕过
@staticmethod
def from_action(action: str, label: str) -> 'MicroSkill':
"""
从具体动作创建微技能
例如: "click('submit_btn')" -> 模板 "click('{button_id}')"
"""
# 提取占位符
placeholders = set(re.findall(r'\{(\w+)\}', action))
# 替换为占位符格式
template = action
for ph in placeholders:
template = template.replace(f'{{{ph}}}', f'{{{ph}}}')
return MicroSkill(
skill_id=f"micro_{label}_{hash(action) % 10000}",
semantic_label=label,
execution_template=template,
placeholders=placeholders
)
def bind(self, state: Dict[str, Any]) -> Optional[str]:
"""
模板绑定:将占位符替换为当前状态的值
返回绑定后的可执行代码,失败返回None
"""
self.bind_count += 1
try:
bound = self.execution_template
for placeholder in self.placeholders:
# 从状态中查找对应的值
value = self._resolve_placeholder(placeholder, state)
if value is None:
self.consecutive_failures += 1
return None
bound = bound.replace(f'{{{placeholder}}}', str(value))
# 绑定成功,重置连续失败计数
self.consecutive_failures = 0
return bound
except Exception as e:
self.consecutive_failures += 1
return None
def _resolve_placeholder(self, placeholder: str, state: Dict) -> Optional[str]:
"""从状态中解析占位符的值"""
# 简化实现:直接从state中查找
# 实际实现可能需要更复杂的解析逻辑
return state.get(placeholder)
def should_bypass(self) -> bool:
"""是否应该绕过模板,直接走LLM"""
return self.consecutive_failures >= self.max_failures_before_bypass
def __repr__(self):
return f"MicroSkill(id={self.skill_id}, label={self.semantic_label})"
2.3 技能库
python
from typing import Dict, List, Optional, Set
import json
class SkillLibrary:
"""技能库:管理所有宏技能和微技能"""
def __init__(self):
self.macro_skills: Dict[str, MacroSkill] = {}
self.micro_skills: Dict[str, MicroSkill] = {}
def add_macro_skill(self, skill: MacroSkill):
"""添加宏技能"""
self.macro_skills[skill.skill_id] = skill
print(f"[SkillLibrary] 添加宏技能: {skill}")
def add_micro_skill(self, skill: MicroSkill, macro_skill_id: str):
"""添加微技能并关联到宏技能"""
self.micro_skills[skill.skill_id] = skill
# 关联到宏技能
if macro_skill_id in self.macro_skills:
self.macro_skills[macro_skill_id].linked_micro_skills.add(skill.skill_id)
print(f"[SkillLibrary] 添加微技能: {skill} -> {macro_skill_id}")
def get_macro_skill(self, skill_id: str) -> Optional[MacroSkill]:
return self.macro_skills.get(skill_id)
def get_micro_skill(self, skill_id: str) -> Optional[MicroSkill]:
return self.micro_skills.get(skill_id)
def get_micro_skills_for_macro(self, macro_skill_id: str) -> List[MicroSkill]:
"""获取宏技能关联的所有微技能"""
macro = self.get_macro_skill(macro_skill_id)
if not macro:
return []
return [
self.get_micro_skill(mid)
for mid in macro.linked_micro_skills
if self.get_micro_skill(mid)
]
def retrieve_relevant_macros(
self,
current_state: Dict,
task_goal: str,
top_k: int = 3
) -> List[MacroSkill]:
"""
检索相关的宏技能(简化实现)
实际应用中应该用嵌入向量相似度
"""
# 简化:用关键词匹配
results = []
for skill in self.macro_skills.values():
# 检查意图是否包含任务目标的相关词
if any(word in skill.intent.lower()
for word in task_goal.lower().split()):
results.append(skill)
# 按成功模式数量排序(经验越丰富的优先)
results.sort(key=lambda s: len(s.success_patterns), reverse=True)
return results[:top_k]
def save(self, path: str):
"""保存到文件"""
data = {
"macro_skills": {
sid: {
"skill_id": s.skill_id,
"intent": s.intent,
"success_patterns": [
{"do": p.do, "done_when": p.done_when}
for p in s.success_patterns
],
"lessons": [
{"failure_type": l.failure_type, "recovery": l.recovery_signal}
for l in s.lessons
],
"risk_guards": [
{"condition": r.condition, "description": r.description}
for r in s.risk_guards
],
"linked_micro_skills": list(s.linked_micro_skills)
}
for sid, s in self.macro_skills.items()
}
}
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"[SkillLibrary] 已保存到 {path}")
def load(self, path: str):
"""从文件加载"""
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
for sid, sdata in data["macro_skills"].items():
skill = MacroSkill(skill_id=sdata["skill_id"], intent=sdata["intent"])
for p in sdata.get("success_patterns", []):
skill.add_success_pattern(p["do"], p["done_when"])
for l in sdata.get("lessons", []):
skill.add_lesson(l["failure_type"], l["recovery"])
for r in sdata.get("risk_guards", []):
skill.add_risk_guard(r["condition"], r["description"])
skill.linked_micro_skills = set(sdata.get("linked_micro_skills", []))
self.macro_skills[skill.skill_id] = skill
print(f"[SkillLibrary] 已从 {path} 加载")
三、核心组件实现
3.1 规划器:安全检查的关键
python
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass
@dataclass
class PlanningDecision:
"""规划器输出的决策"""
subtask: str # u_t: 下一个原子子任务
expected_effect: str # ê_t: 期望的可观测效果
is_complete: bool = False # y_t: 是否完成任务
micro_skill_id: Optional[str] = None # id_t: 可选的微技能引用
constraints: List[str] = field(default_factory=list) # 传给执行器的约束
class Planner:
"""
规划器:决定每一步怎么走,检查安全边界
"""
def __init__(self, skill_library: SkillLibrary, llm_client=None):
self.library = skill_library
self.llm_client = llm_client # 可选,用于LLM语义判断
def plan(
self,
current_state: Dict[str, Any],
history: List[Dict],
task_goal: str
) -> PlanningDecision:
"""
规划下一步行动
核心逻辑:
1. 检索相关宏技能
2. 检查每个宏技能的风险守卫是否满足
3. 决定是否激活微技能
"""
# 1. 检索相关宏技能
relevant_macros = self.library.retrieve_relevant_macros(
current_state, task_goal
)
if not relevant_macros:
# 没有相关技能,走LLM自由发挥
return self._plan_with_llm(current_state, task_goal)
# 2. 选择最相关的宏技能(简化:选第一个)
selected_macro = relevant_macros[0]
# 3. 检查安全边界
safety_result = self._check_safety_boundary(selected_macro, current_state)
if not safety_result.is_safe:
# 安全边界不满足,抑制微技能
print(f"[Planner] 安全检查失败: {safety_result.reason}")
return PlanningDecision(
subtask=safety_result.recommended_action,
expected_effect="安全执行",
is_complete=False,
micro_skill_id=None, # 抑制微技能
constraints=safety_result.constraints
)
# 4. 安全检查通过,选择微技能
micro_skills = self.library.get_micro_skills_for_macro(selected_macro.skill_id)
if micro_skills:
# 选第一个可用的微技能
selected_micro = micro_skills[0]
return PlanningDecision(
subtask=selected_micro.semantic_label,
expected_effect=self._infer_expected_effect(selected_macro),
is_complete=False,
micro_skill_id=selected_micro.skill_id,
constraints=[str(rg.condition) for rg in selected_macro.risk_guards]
)
# 没有可用微技能,走LLM
return self._plan_with_llm(current_state, task_goal)
def _check_safety_boundary(
self,
macro_skill: MacroSkill,
current_state: Dict[str, Any]
) -> 'SafetyCheckResult':
"""
检查安全边界:风险守卫是否满足
"""
violations = []
recommended_actions = []
for guard in macro_skill.risk_guards:
# 简化实现:检查状态中是否有满足条件的证据
if not self._evaluate_guard(guard, current_state):
violations.append(guard.description)
recommended_actions.append(f"需要满足: {guard.condition}")
if violations:
return SafetyCheckResult(
is_safe=False,
reason=f"风险守卫不满足: {', '.join(violations)}",
constraints=recommended_actions
)
return SafetyCheckResult(is_safe=True)
def _evaluate_guard(self, guard: RiskGuard, state: Dict) -> bool:
"""
评估风险守卫是否满足
简化实现,实际应该用更复杂的逻辑
"""
condition = guard.condition.lower()
# 检查用户同意
if "user consent" in condition or "用户同意" in condition:
return state.get("user_consent_verified", False)
# 检查边界
if "boundary" in condition or "scope" in condition or "边界" in condition:
return state.get("within_boundary", True)
# 检查特定内容
if "verify" in condition or "确认" in condition:
return state.get("content_verified", False)
# 默认满足
return True
def _infer_expected_effect(self, macro_skill: MacroSkill) -> str:
"""从成功模式推断期望效果"""
if macro_skill.success_patterns:
return macro_skill.success_patterns[0].done_when
return "操作完成"
def _plan_with_llm(
self,
state: Dict[str, Any],
goal: str
) -> PlanningDecision:
"""没有相关技能时,用LLM规划"""
# 这里简化处理,实际应该调用LLM
return PlanningDecision(
subtask=f"执行任务: {goal}",
expected_effect="任务完成",
is_complete=False,
micro_skill_id=None,
constraints=[]
)
@dataclass
class SafetyCheckResult:
is_safe: bool
reason: Optional[str] = None
constraints: List[str] = field(default_factory=list)
3.2 执行器:双模式执行
python
import asyncio
from typing import Dict, Any, Optional
class Executor:
"""
执行器:负责动作执行
支持两种模式:
1. 模板重放(确定性,高效)
2. LLM回退(灵活,安全)
"""
def __init__(self, skill_library: SkillLibrary, llm_client=None):
self.library = skill_library
self.llm_client = llm_client
async def execute(
self,
decision: PlanningDecision,
current_state: Dict[str, Any]
) -> 'ExecutionResult':
"""
执行规划器输出的决策
"""
# 情况1:有微技能ID,尝试模板重放
if decision.micro_skill_id:
micro_skill = self.library.get_micro_skill(decision.micro_skill_id)
if micro_skill and not micro_skill.should_bypass():
# 尝试模板绑定
bound_action = micro_skill.bind(current_state)
if bound_action:
# 绑定成功,执行模板
result = await self._execute_template(bound_action, current_state)
return ExecutionResult(
success=True,
action_type="template",
action=bound_action,
output=result
)
else:
# 绑定失败,记录
print(f"[Executor] 模板绑定失败,准备回退: {micro_skill.execution_template}")
# 情况2:没有微技能或绑定失败,走LLM回退
return await self._execute_with_llm(decision, current_state)
async def _execute_template(
self,
bound_action: str,
state: Dict[str, Any]
) -> Dict[str, Any]:
"""
执行模板代码
简化实现:实际应该调用真正的UI自动化框架
"""
print(f"[Executor] 执行模板: {bound_action}")
# 模拟执行
# 实际实现中,这里会调用 pyautogui / playwright / selenium 等
await asyncio.sleep(0.1) # 模拟操作延迟
# 返回执行后的新状态
return {
"action_executed": bound_action,
"new_state": state, # 简化:假设状态不变
"effect": "模板执行成功"
}
async def _execute_with_llm(
self,
decision: PlanningDecision,
state: Dict[str, Any]
) -> ExecutionResult:
"""
用LLM生成并执行动作
"""
print(f"[Executor] LLM回退模式: {decision.subtask}")
if self.llm_client:
# 调用LLM生成动作
action = await self.llm_client.generate_action(
subtask=decision.subtask,
state=state,
constraints=decision.constraints
)
else:
# 简化实现
action = f"LLM执行: {decision.subtask}"
return ExecutionResult(
success=True,
action_type="llm_fallback",
action=action,
output={"effect": "LLM生成执行"}
)
def check_completion(
self,
macro_skill: MacroSkill,
state: Dict[str, Any]
) -> bool:
"""
检查任务是否完成
"""
for pattern in macro_skill.success_patterns:
# 简化:检查done_when条件是否满足
if self._check_done_when(pattern.done_when, state):
return True
return False
def _check_done_when(self, condition: str, state: Dict) -> bool:
"""
检查done_when条件
简化实现
"""
# 实际应该解析条件并检查状态
return state.get("task_completed", False)
@dataclass
class ExecutionResult:
success: bool
action_type: str # "template" 或 "llm_fallback"
action: str
output: Dict[str, Any]
3.3 技能演化:从经验中学习
python
from typing import List, Tuple, Optional
from dataclasses import dataclass
@dataclass
class TrajectoryAnalysis:
"""轨迹分析结果"""
has_new_knowledge: bool
new_success_patterns: List[Tuple[str, str]] = field(default_factory=list)
new_lessons: List[Tuple[str, str]] = field(default_factory=list)
new_risk_guards: List[str = field(default_factory=list)
class SkillEvolution:
"""
技能演化:从探索轨迹中学习/更新技能
"""
def __init__(self, skill_library: SkillLibrary):
self.library = skill_library
def analyze_trajectory(
self,
trajectory: List[Dict],
task_goal: str,
policy_violations: List[str] = None
) -> TrajectoryAnalysis:
"""
分析轨迹,提取监督信号
Args:
trajectory: 执行轨迹,每步包含 action, observation, success
task_goal: 任务目标
policy_violations: 策略违规列表
"""
analysis = TrajectoryAnalysis(has_new_knowledge=False)
# 1. 分析成功子任务,提取成功模式
success_segments = self._extract_success_segments(trajectory)
for do, done_when in success_segments:
analysis.new_success_patterns.append((do, done_when))
analysis.has_new_knowledge = True
# 2. 分析失败子任务,提炼教训
failure_segments = self._extract_failure_segments(trajectory)
for failure_type, recovery in failure_segments:
analysis.new_lessons.append((failure_type, recovery))
analysis.has_new_knowledge = True
# 3. 分析策略违规,汇聚风险守卫
if policy_violations:
for violation in policy_violations:
analysis.new_risk_guards.append(violation)
analysis.has_new_knowledge = True
return analysis
def _extract_success_segments(
self,
trajectory: List[Dict]
) -> List[Tuple[str, str]]:
"""提取成功的子任务序列"""
segments = []
current_segment = []
for step in trajectory:
if step.get("success"):
current_segment.append(step.get("action"))
else:
if current_segment:
# 形成一个成功段
do = " -> ".join(current_segment)
done_when = step.get("observation", "步骤完成")
segments.append((do, done_when))
current_segment = []
return segments
def _extract_failure_segments(
self,
trajectory: List[Dict]
) -> List[Tuple[str, str]]:
"""提取失败的子任务及恢复方式"""
segments = []
for step in trajectory:
if not step.get("success") and step.get("recovery"):
failure_type = step.get("error_type", "未知错误")
recovery = step.get("recovery", "重启")
segments.append((failure_type, recovery))
return segments
def create_or_update_skill(
self,
analysis: TrajectoryAnalysis,
task_goal: str
) -> Optional[str]:
"""
根据分析结果创建或更新技能
Returns:
技能ID,如果没有新知识则返回None
"""
if not analysis.has_new_knowledge:
print("[SkillEvolution] 没有新知识,不创建技能")
return None
# 检查是否有相似的现有技能
existing = self._find_similar_skill(task_goal)
if existing:
# 更新现有技能
return self._update_skill(existing, analysis)
else:
# 创建新技能
return self._create_new_skill(task_goal, analysis)
def _find_similar_skill(self, goal: str) -> Optional[MacroSkill]:
"""查找相似的现有技能"""
goal_keywords = set(goal.lower().split())
for skill in self.library.macro_skills.values():
intent_keywords = set(skill.intent.lower().split())
# 简单重叠检查
if goal_keywords & intent_keywords:
return skill
return None
def _create_new_skill(
self,
goal: str,
analysis: TrajectoryAnalysis
) -> str:
"""创建新技能"""
skill_id = f"macro_{goal[:20].replace(' ', '_')}_{len(self.library.macro_skills)}"
skill = MacroSkill(skill_id=skill_id, intent=goal)
# 添加成功模式
for do, done_when in analysis.new_success_patterns:
skill.add_success_pattern(do, done_when)
# 添加教训
for failure_type, recovery in analysis.new_lessons:
skill.add_lesson(failure_type, recovery)
# 添加风险守卫
for guard in analysis.new_risk_guards:
skill.add_risk_guard(guard, f"策略约束: {guard}")
self.library.add_macro_skill(skill)
return skill_id
def _update_skill(
self,
skill: MacroSkill,
analysis: TrajectoryAnalysis
) -> str:
"""更新现有技能"""
print(f"[SkillEvolution] 更新技能: {skill.skill_id}")
# 添加新的成功模式(去重)
existing_patterns = {(p.do, p.done_when) for p in skill.success_patterns}
for do, done_when in analysis.new_success_patterns:
if (do, done_when) not in existing_patterns:
skill.add_success_pattern(do, done_when)
# 添加新的教训(去重)
existing_lessons = {l.failure_type for l in skill.lessons}
for failure_type, recovery in analysis.new_lessons:
if failure_type not in existing_lessons:
skill.add_lesson(failure_type, recovery)
# 添加新的风险守卫(去重)
existing_guards = {r.condition for r in skill.risk_guards}
for guard in analysis.new_risk_guards:
if guard not in existing_guards:
skill.add_risk_guard(guard, f"策略约束: {guard}")
return skill.skill_id
四、端到端示例:实现一个安全的学习循环
4.1 完整的Agent实现
python
import asyncio
from typing import Dict, Any, List
class SkillHarnessAgent:
"""
完整的SKILLHARNESS Agent实现
"""
def __init__(self):
# 核心组件
self.skill_library = SkillLibrary()
self.planner = Planner(self.skill_library)
self.executor = Executor(self.skill_library)
self.evolution = SkillEvolution(self.skill_library)
# 状态
self.current_task = None
self.execution_history = []
async def learn_from_exploration(
self,
exploration_goals: List[str],
max_rounds: int = 30
):
"""
从探索中学习技能
"""
print(f"[Agent] 开始探索学习,最多 {max_rounds} 轮")
for round_num in range(max_rounds):
print(f"\n--- 探索轮次 {round_num + 1}/{max_rounds} ---")
# 1. 选择探索目标(优先选覆盖不足的能力簇)
goal = await self._select_exploration_goal(exploration_goals)
# 2. 执行探索
trajectory = await self._execute_exploration(goal)
# 3. 分析轨迹
violations = self._detect_policy_violations(trajectory)
analysis = self.evolution.analyze_trajectory(
trajectory, goal, violations
)
# 4. 创建或更新技能
if analysis.has_new_knowledge:
skill_id = self.evolution.create_or_update_skill(analysis, goal)
print(f"[Agent] 技能更新: {skill_id}")
async def _select_exploration_goal(self, goals: List[str]) -> str:
"""选择探索目标(简化实现)"""
# 简化:随机选择
import random
return random.choice(goals)
async def _execute_exploration(self, goal: str) -> List[Dict]:
"""执行探索,返回轨迹"""
# 简化:模拟执行
return [
{"action": "click('btn1')", "success": True, "observation": "按钮点击成功"},
{"action": "fill('input', 'value')", "success": True, "observation": "输入完成"},
{"action": "click('submit')", "success": True, "observation": "提交成功"},
]
def _detect_policy_violations(self, trajectory: List[Dict]) -> List[str]:
"""检测策略违规(简化实现)"""
# 简化:无违规
return []
async def execute_task(self, task_goal: str, initial_state: Dict):
"""
执行任务(利用已学习的技能)
"""
print(f"\n[Agent] 执行任务: {task_goal}")
self.current_task = task_goal
current_state = initial_state.copy()
self.execution_history = []
max_steps = 50
for step in range(max_steps):
# 1. 规划下一步
decision = self.planner.plan(
current_state=current_state,
history=self.execution_history,
task_goal=task_goal
)
print(f"[Step {step+1}] 决策: {decision.subtask} "
f"(微技能: {decision.micro_skill_id})")
# 2. 执行
result = await self.executor.execute(decision, current_state)
print(f"[Step {step+1}] 执行结果: {result.action_type} - {result.action}")
# 3. 更新状态(简化)
current_state.update(result.output.get("new_state", {}))
self.execution_history.append({
"step": step,
"decision": decision,
"result": result
})
# 4. 检查是否完成
if decision.is_complete:
print(f"[Agent] 任务完成!")
return {"success": True, "history": self.execution_history}
print(f"[Agent] 达到最大步数 {max_steps},任务未完成")
return {"success": False, "history": self.execution_history}
4.2 使用示例
python
async def main():
# 创建Agent
agent = SkillHarnessAgent()
# 模拟:先学习一些技能
await agent.learn_from_exploration([
"创建GitLab项目",
"提交代码到仓库",
"创建合并请求",
"添加项目成员"
])
# 保存技能库
agent.skill_library.save("skill_library.json")
# 后来使用时加载技能库
agent2 = SkillHarnessAgent()
agent2.skill_library.load("skill_library.json")
# 执行任务
result = await agent2.execute_task(
task_goal="在GitLab上创建一个名为'planner'的私有项目",
initial_state={
"current_page": "gitlab.com/projects",
"user_consent_verified": True,
"within_boundary": True
}
)
print(f"\n执行结果: {result['success']}")
asyncio.run(main())
4.3 输出示例
ini
[Agent] 开始探索学习,最多 30 轮
--- 探索轮次 1/30 ---
[SkillLibrary] 添加宏技能: MacroSkill(id=macro_GitLab项目, intent=创建GitLab项目)
[Agent] 技能更新: macro_GitLab项目
--- 探索轮次 2/30 ---
[SkillLibrary] 添加微技能: MicroSkill(id=micro_点击创建按钮, label=点击创建按钮) -> macro_GitLab项目
[Agent] 执行任务: 在GitLab上创建一个名为'planner'的私有项目
[Step 1] 决策: 点击创建按钮 (微技能: micro_点击创建按钮)
[Executor] 执行模板: click('create_project_btn')
[Step 1] 执行结果: template - click('create_project_btn')
[Step 2] 决策: 输入项目名称 (微技能: None)
[Executor] LLM回退模式: 输入项目名称
[Step 2] 执行结果: llm_fallback - LLM执行: 输入项目名称
[Agent] 任务完成!
五、关键设计决策
5.1 为什么分离宏技能和微技能?
核心原因:意图和落地需要不同的抽象层次。
arduino
宏技能回答: "我要完成什么目标?"
"怎么算成功了?"
"有什么危险?"
微技能回答: "具体按哪个按钮?"
"填什么值?"
"界面变了怎么办?"
好处:
- 宏技能存储的是稳定知识(目标、策略、安全边界),不会因为UI变化而失效
- 微技能存储的是具体操作,UI变了只更新微技能即可
- 安全边界在宏技能层,同一个目标的不同实现都共享同一套安全约束
5.2 为什么需要风险守卫?
传统方法认为"成功过的操作就是安全的",但SKILLHARNESS发现:
arduino
问题:偶然的成功 ≠ 安全的操作
例子:用户没注意,Agent跳过了"确认"步骤,成功提交了。
下次遇到类似情况,如果用户真的需要确认,就会出大问题。
风险守卫的作用:把"观察到的不安全情况"积累成约束条件,下次激活技能前必须检查。
5.3 双模式执行的权衡
markdown
确定性(模板重放) 灵活性(LLM回退)
↑ ↑
│ │
高效、可靠 适应新环境、处理异常
│ │
└────────────┬────────────────┘
环境变化程度
自适应绕过:同一意图连续失败多次后,禁用模板重放。这防止了脆弱的模板在环境变化后累积错误。
六、实际集成建议
6.1 与现有Agent框架集成
SKILLHARNESS的组件可以很方便地集成到LangChain、AutoGen等框架:
python
# 以LangChain Agent为例
from langchain.agents import AgentExecutor
class SkillHarnessTool:
"""包装为LangChain Tool"""
def __init__(self, skill_library: SkillLibrary):
self.library = skill_library
def run(self, tool_input: str) -> str:
# 使用SKILLHARNESS的规划器选择技能
planner = Planner(self.library)
decision = planner.plan(
current_state={"input": tool_input},
history=[],
task_goal=tool_input
)
# 执行
executor = Executor(self.library)
result = executor.execute_sync(decision, {"input": tool_input})
return result.output.get("effect", "执行完成")
6.2 持久化与增量学习
python
import json
from datetime import datetime
class PersistentSkillLibrary(SkillLibrary):
"""支持持久化和增量学习的技能库"""
def __init__(self, storage_path: str = "skills/"):
super().__init__()
self.storage_path = storage_path
def auto_save(self):
"""每次更新后自动保存"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.save(f"{self.storage_path}/skill_library_{timestamp}.json")
def load_latest(self):
"""加载最新的技能库"""
import os
files = sorted([
f for f in os.listdir(self.storage_path)
if f.startswith("skill_library_")
])
if files:
self.load(f"{self.storage_path}/{files[-1]}")
def merge(self, other: 'PersistentSkillLibrary'):
"""合并两个技能库(用于多Agent协作)"""
for skill_id, skill in other.macro_skills.items():
if skill_id in self.macro_skills:
# 合并到现有技能
existing = self.macro_skills[skill_id]
existing.success_patterns.extend(skill.success_patterns)
existing.lessons.extend(skill.lessons)
existing.risk_guards.extend(skill.risk_guards)
else:
# 添加新技能
self.add_macro_skill(skill)
6.3 监控与调试
python
class SkillHarnessMonitor:
"""监控SKILLHARNESS运行状态"""
def __init__(self):
self.metrics = {
"template_success": 0,
"template_failure": 0,
"llm_fallback": 0,
"safety_blocked": 0,
"skills_created": 0,
"skills_updated": 0
}
def record(self, event: str, details: dict = None):
"""记录事件"""
if event in self.metrics:
self.metrics[event] += 1
print(f"[Monitor] {event}: {details or {}}")
def report(self) -> dict:
"""生成报告"""
total_executions = (
self.metrics["template_success"] +
self.metrics["template_failure"] +
self.metrics["llm_fallback"]
)
return {
"template_success_rate": (
self.metrics["template_success"] / total_executions
if total_executions > 0 else 0
),
"llm_fallback_rate": (
self.metrics["llm_fallback"] / total_executions
if total_executions > 0 else 0
),
"safety_block_rate": (
self.metrics["safety_blocked"] / total_executions
if total_executions > 0 else 0
),
**self.metrics
}
七、总结
SKILLHARNESS的核心贡献是把**"怎么做"和"什么时候做"**分开:
| 层次 | 回答的问题 | 存储的内容 |
|---|---|---|
| 宏技能 | 目标是什么?成功什么样?有什么坑? | 意图、成功模式、教训、风险守卫 |
| 微技能 | 具体怎么按?填什么值? | 语义标签、执行模板、占位符 |
安全的关键在于:不是学会了就能用,而是每次用之前都要检查安全边界。
灵活的关键在于:模板失效了还有LLM兜底,不会彻底歇菜。
希望这篇文章能帮助你把SKILLHARNESS用到自己的项目中!
参考论文:浙江大学 SKILLHARNESS,2026年6月