【Java手搓OpenManus】-5- 工具系统设计
- [1 引用](#1 引用)
- [2 系统架构设计](#2 系统架构设计)
- [3 核心代码实现](#3 核心代码实现)
-
- [3.1 统一结果封装:ToolResult](#3.1 统一结果封装:ToolResult)
- [2.2 工具统一功能接口:Tool 接口](#2.2 工具统一功能接口:Tool 接口)
- [2.3 模板实现BaseTool 抽象类](#2.3 模板实现BaseTool 抽象类)
- [2.4 ToolCollection 实现](#2.4 ToolCollection 实现)
- [2.5 具体工具类实现TerminateTool](#2.5 具体工具类实现TerminateTool)
- [2.6 浏览器工具类](#2.6 浏览器工具类)
- [3 测试](#3 测试)
-
- [3.1 工具测试](#3.1 工具测试)
- [3.2 工具集合测试](#3.2 工具集合测试)
- [3.3 工具结果测试](#3.3 工具结果测试)
- [3.4 浏览器工具测试](#3.4 浏览器工具测试)
- [4 思考](#4 思考)
-
- [4.1 转换格式](#4.1 转换格式)
1 引用
在上一章中,我们初步搭建了 Agent 的核心交互逻辑。但在处理 askWithTools 时,我们通过手动构建巨大的 JSON Map 来定义工具,这种硬编码的方式不仅代码不够优雅,而且极难维护和扩展。
Agent 的核心能力在于使用工具。为了让 Agent 能够对接 MCP、网页浏览器、文件系统等多种复杂的工具,我们需要设计一个统一、规范且易于扩展的工具系统。
本章我们将采用经典的面向对象设计,构建 OpenManus 的工具基石。
2 系统架构设计
我们采用 接口-> 抽象类-> 具体实现的分层设计模式。这种设计既保证了工具调用的统一性,又保留了具体工具实现的灵活性。
Tool (接口)
↓
BaseTool (抽象类)
↓
具体工具实现(如 BrowserTool)
3 核心代码实现
3.1 统一结果封装:ToolResult
为了让 LLM 能够理解工具执行的结果,我们需要封装一个标准的回包对象。
创建com.openmanus.domain.tool.ToolResult
java
package com.openmanus.domain.tool;
import lombok.Builder;
import lombok.Data;
/**
* 工具执行结果 - 对应 Python 版本的 app/tool/base.py ToolResult
*
* 封装工具执行的输出、错误信息、图片等
*/
@Data
@Builder
public class ToolResult {
private Object output; // 工具输出结果
private String error; // 错误信息
private String base64Image; // Base64 编码的图片(用于多模态)
private String system; // 系统消息
/**
* 判断结果是否有效(成功)- 对应 Python: ToolResult.__bool__()
*
* 注意:这里"有效"表示"成功",即没有错误
* 失败的结果(有 error)被认为是无效的
*/
public boolean isValid() {
// 有错误就是无效的
if (error != null) {
return false;
}
// 没有错误,且有输出(output、base64Image 或 system),就是有效的
return output != null || base64Image != null || system != null;
}
/**
* 创建成功结果 - 对应 Python: BaseTool.success_response()
*/
public static ToolResult success(Object output) {
return ToolResult.builder()
.output(output)
.build();
}
/**
* 创建失败结果 - 对应 Python: BaseTool.fail_response()
*/
public static ToolResult failure(String error) {
return ToolResult.builder()
.error(error)
.build();
}
@Override
public String toString() {
if (error != null) {
return "Error: " + error;
}
return output != null ? output.toString() : "";
}
}
2.2 工具统一功能接口:Tool 接口
这是所有工具必须实现的接口,特别注意 toFunctionCallFormat 方法,它负责将 Java 对象转换为 LLM (如 OpenAI) 能识别的 JSON Schema 格式。
创建com.openmanus.domain.tool.Tool
java
package com.openmanus.domain.tool;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
/**
* 工具接口 - 对应 Python 版本的 app/tool/base.py BaseTool
*
* 所有工具必须实现此接口,提供名称、描述和执行方法
*/
public interface Tool {
/**
* 获取工具名称 - 对应 Python: BaseTool.name
*/
String getName();
/**
* 获取工具描述 - 对应 Python: BaseTool.description
*/
String getDescription();
/**
* 获取工具参数 Schema(JSON Schema 格式)- 对应 Python: BaseTool.parameters
*/
Map<String, Object> getParameters();
/**
* 执行工具 - 对应 Python: BaseTool.execute()
*
* @param params 工具参数(从 LLM 的 function call arguments 解析而来)
* @return 工具执行结果
*/
CompletableFuture<ToolResult> execute(Map<String, Object> params);
/**
* 转换为函数调用格式(用于 LLM)- 对应 Python: BaseTool.to_param()
*/
default Map<String, Object> toFunctionCallFormat() {
return Map.of(
"type", "function",
"function", Map.of(
"name", getName(),
"description", getDescription(),
"parameters", getParameters()
)
);
}
/**
* 清理资源(如果工具需要)- 对应 Python: BaseTool.cleanup()
*/
default CompletableFuture<Void> cleanup() {
return CompletableFuture.completedFuture(null);
}
}
2.3 模板实现BaseTool 抽象类
子类只需关注 execute 方法的实现。
创建com.openmanus.domain.tool.BaseTool
java
package com.openmanus.domain.tool;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
/**
* 工具基类 - 对应 Python 版本的 app/tool/base.py BaseTool
*
* 提供工具的基础实现,子类只需实现 execute 方法
*/
public abstract class BaseTool implements Tool {
protected final String name;
protected final String description;
protected final Map<String, Object> parameters;
protected BaseTool(String name, String description, Map<String, Object> parameters) {
this.name = name;
this.description = description;
this.parameters = parameters != null ? parameters : Map.of();
}
@Override
public String getName() {
return name;
}
@Override
public String getDescription() {
return description;
}
@Override
public Map<String, Object> getParameters() {
return parameters;
}
/**
* 子类必须实现此方法 - 对应 Python: BaseTool.execute()
*/
@Override
public abstract CompletableFuture<ToolResult> execute(Map<String, Object> params);
/**
* 创建成功响应 - 对应 Python: BaseTool.success_response()
*/
protected ToolResult successResponse(Object data) {
return ToolResult.success(data);
}
/**
* 创建失败响应 - 对应 Python: BaseTool.fail_response()
*/
protected ToolResult failResponse(String message) {
return ToolResult.failure(message);
}
}
2.4 ToolCollection 实现
Agent 可能拥有几十个工具,需要一个容器来管理它们的注册、查找和批量格式转换。
创建com.openmanus.domain.tool.ToolCollection
java
package com.openmanus.domain.tool;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
/**
* 工具集合 - 对应 Python 版本的 app/tool/tool_collection.py ToolCollection
*
* 管理多个工具,提供查找和执行功能
*/
public class ToolCollection {
private final Map<String, Tool> toolMap;
public ToolCollection(Tool... tools) {
this.toolMap = new HashMap<>();
if (tools != null) {
for (Tool tool : tools) {
addTool(tool);
}
}
}
/**
* 添加工具 - 对应 Python: ToolCollection.add_tool()
*/
public void addTool(Tool tool) {
if (tool != null) {
toolMap.put(tool.getName(), tool);
}
}
public void addTools(Tool... tools) {
if (tools != null) {
for (Tool tool : tools) {
addTool(tool);
}
}
}
/**
* 获取工具 - 对应 Python: ToolCollection.get_tool()
*/
public Tool getTool(String name) {
return toolMap.get(name);
}
/**
* 检查工具是否存在
*/
public boolean hasTool(String name) {
return toolMap.containsKey(name);
}
/**
* 获取所有工具的函数调用格式 - 对应 Python: ToolCollection.to_params()
*/
public List<Map<String, Object>> toFunctionCallFormats() {
return toolMap.values().stream()
.map(Tool::toFunctionCallFormat)
.collect(Collectors.toList());
}
/**
* 执行工具 - 对应 Python: ToolCollection.execute()
*/
public CompletableFuture<ToolResult> execute(String name, Map<String, Object> toolInput) {
Tool tool = toolMap.get(name);
if (tool == null) {
return CompletableFuture.completedFuture(
ToolResult.failure("Unknown tool: " + name)
);
}
return tool.execute(toolInput);
}
/**
* 获取所有工具名称
*/
public Set<String> getToolNames() {
return new HashSet<>(toolMap.keySet());
}
/**
* 获取工具映射(只读)
*/
public Map<String, Tool> getToolMap() {
return Collections.unmodifiableMap(toolMap);
}
}
2.5 具体工具类实现TerminateTool
这是一个特殊的工具,允许 Agent 主动结束任务。
创建com.openmanus.domain.tool.TerminateTool
java
package com.openmanus.domain.tool;
import com.openmanus.domain.tool.BaseTool;
import com.openmanus.domain.tool.ToolResult;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
/**
* 终止工具 - 用于 Agent 主动结束执行
*
* 对应 Python 版本的特殊工具处理
*/
public class TerminateTool extends BaseTool {
public TerminateTool() {
super(
"terminate",
"Terminate the current task execution. Use this when the task is complete or cannot be completed.",
createParametersSchema()
);
}
private static Map<String, Object> createParametersSchema() {
Map<String, Object> schema = new HashMap<>();
schema.put("type", "object");
schema.put("properties", Map.of(
"reason", Map.of(
"type", "string",
"description", "Reason for termination"
)
));
schema.put("required", List.of());
return schema;
}
@Override
public CompletableFuture<ToolResult> execute(Map<String, Object> params) {
String reason = (String) params.getOrDefault("reason", "Task completed");
return CompletableFuture.completedFuture(
ToolResult.success("Terminated: " + reason)
);
}
}
2.6 浏览器工具类
创建com.openmanus.domain.tool.impl.BrowserTool
java
package com.openmanus.domain.tool.impl;
import com.openmanus.domain.tool.BaseTool;
import com.openmanus.domain.tool.ToolResult;
import com.microsoft.playwright.Browser;
import com.microsoft.playwright.BrowserContext;
import com.microsoft.playwright.BrowserType;
import com.microsoft.playwright.Page;
import com.microsoft.playwright.Playwright;
import lombok.extern.slf4j.Slf4j;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
/**
* 浏览器工具 - 对应 Python 版本的 app/tool/browser_use_tool.py BrowserUseTool
*
* 使用 Playwright 实现浏览器自动化功能
*/
@Slf4j
public class BrowserTool extends BaseTool {
private static final String BROWSER_DESCRIPTION = """
A powerful browser automation tool that allows interaction with web pages through various actions.
* This tool provides commands for controlling a browser session, navigating web pages, and extracting information
* It maintains state across calls, keeping the browser session alive until explicitly closed
* Use this when you need to browse websites, fill forms, click buttons, extract content, or perform web searches
Key capabilities include:
* Navigation: Go to specific URLs, go back, search the web, or refresh pages
* Interaction: Click elements, input text, select from dropdowns, send keyboard commands
* Scrolling: Scroll up/down by pixel amount or scroll to specific text
* Content extraction: Extract and analyze content from web pages based on specific goals
""";
private Playwright playwright;
private Browser browser;
private BrowserContext context;
private Page page;
public BrowserTool() {
super(
"browser_use",
BROWSER_DESCRIPTION,
createParametersSchema()
);
}
/**
* 创建参数 Schema - 对应 Python: BrowserUseTool.parameters
*/
private static Map<String, Object> createParametersSchema() {
Map<String, Object> schema = new HashMap<>();
schema.put("type", "object");
Map<String, Object> properties = new HashMap<>();
// action 参数
Map<String, Object> action = new HashMap<>();
action.put("type", "string");
action.put("enum", List.of(
"go_to_url", "click_element", "input_text", "scroll_down",
"scroll_up", "scroll_to_text", "send_keys", "go_back",
"extract_content", "wait"
));
action.put("description", "The browser action to perform");
properties.put("action", action);
// url 参数
Map<String, Object> url = new HashMap<>();
url.put("type", "string");
url.put("description", "URL for 'go_to_url' action");
properties.put("url", url);
// text 参数
Map<String, Object> text = new HashMap<>();
text.put("type", "string");
text.put("description", "Text for 'input_text' or 'scroll_to_text' actions");
properties.put("text", text);
// index 参数
Map<String, Object> index = new HashMap<>();
index.put("type", "integer");
index.put("description", "Element index for 'click_element' or 'input_text' actions");
properties.put("index", index);
// goal 参数
Map<String, Object> goal = new HashMap<>();
goal.put("type", "string");
goal.put("description", "Extraction goal for 'extract_content' action");
properties.put("goal", goal);
schema.put("properties", properties);
schema.put("required", List.of("action"));
return schema;
}
/**
* 初始化浏览器 - 对应 Python: BrowserUseTool 的浏览器初始化
*/
private void initializeBrowser() {
if (playwright == null) {
playwright = Playwright.create();
browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
.setHeadless(false)); // 可以配置为 headless
context = browser.newContext();
page = context.newPage();
// 设置默认超时为 10 秒,避免操作超时
page.setDefaultTimeout(10000);
log.info("Browser initialized with 10s default timeout");
}
}
/**
* 执行浏览器操作 - 对应 Python: BrowserUseTool.execute()
*/
@Override
public CompletableFuture<ToolResult> execute(Map<String, Object> params) {
return CompletableFuture.supplyAsync(() -> {
try {
initializeBrowser();
String action = (String) params.get("action");
if (action == null) {
return failResponse("Action parameter is required");
}
// 使用 switch 表达式处理不同操作
// 对应 Python: if action == "go_to_url": ...
return switch (action) {
case "go_to_url" -> handleGoToUrl(params);
case "click_element" -> handleClickElement(params);
case "input_text" -> handleInputText(params);
case "scroll_down" -> handleScrollDown(params);
case "scroll_up" -> handleScrollUp(params);
case "extract_content" -> handleExtractContent(params);
case "go_back" -> handleGoBack();
case "wait" -> handleWait(params);
default -> failResponse("Unknown action: " + action);
};
} catch (Exception e) {
log.error("Error executing browser action", e);
return failResponse("Error: " + e.getMessage());
}
});
}
/**
* 导航到 URL - 对应 Python: BrowserUseTool 的 go_to_url
*/
private ToolResult handleGoToUrl(Map<String, Object> params) {
String url = (String) params.get("url");
if (url == null || url.isEmpty()) {
return failResponse("URL parameter is required for go_to_url action");
}
try {
page.navigate(url);
page.waitForLoadState();
String title = page.title();
return successResponse("Navigated to " + url + ". Page title: " + title);
} catch (Exception e) {
return failResponse("Failed to navigate to " + url + ": " + e.getMessage());
}
}
/**
* 点击元素 - 对应 Python: BrowserUseTool 的 click_element
*/
private ToolResult handleClickElement(Map<String, Object> params) {
// 简化实现:根据索引点击元素
// 实际应该根据页面结构定位元素
Integer index = (Integer) params.get("index");
if (index == null) {
return failResponse("Index parameter is required for click_element action");
}
if (index < 0) {
return failResponse("Element index must be non-negative, got: " + index);
}
try {
// 先检查有多少个可点击元素(使用 count() 快速检查,不等待所有元素)
int totalCount = page.locator("button, a, [onclick]").count();
log.debug("Found {} clickable elements on the page", totalCount);
if (totalCount == 0) {
return failResponse("No clickable elements found on the page");
}
if (index >= totalCount) {
return failResponse("Element index out of range: " + index + " (found " + totalCount + " elements)");
}
// 使用 nth() 直接定位第 index 个元素,避免等待所有元素
var locator = page.locator("button, a, [onclick]").nth(index);
// 策略1:先尝试等待元素可见(短超时 2 秒)
try {
// 临时设置短超时
page.setDefaultTimeout(2000);
locator.waitFor();
// 恢复默认超时
page.setDefaultTimeout(10000);
// 元素可见,正常点击
locator.click();
log.info("Successfully clicked visible element at index {}", index);
return successResponse("Clicked element at index " + index);
} catch (Exception visibilityException) {
// 元素不可见,尝试强制点击
log.debug("Element at index {} is not visible, attempting force click", index);
page.setDefaultTimeout(10000); // 恢复默认超时
// 策略2:强制点击隐藏的元素
try {
locator.click(new com.microsoft.playwright.Locator.ClickOptions().setForce(true));
log.info("Successfully force-clicked hidden element at index {}", index);
return successResponse("Force-clicked element at index " + index + " (element was hidden)");
} catch (Exception forceClickException) {
// 策略3:使用 JavaScript 直接触发点击事件(绕过可见性检查)
log.debug("Force click failed, attempting JavaScript click for element at index {}", index);
try {
// 使用 evaluate 直接触发元素的 click 事件
locator.evaluate("element => element.click()");
log.info("Successfully clicked element at index {} using JavaScript", index);
return successResponse("Clicked element at index " + index + " using JavaScript (element was hidden)");
} catch (Exception jsClickException) {
// 所有策略都失败
log.error("All click strategies failed for element at index {}", index, jsClickException);
return failResponse("Failed to click element at index " + index +
": element is not visible and JavaScript click also failed - " + jsClickException.getMessage());
}
}
}
} catch (Exception e) {
// 检查是否是超时异常
String errorMsg = e.getMessage();
if (errorMsg != null && (errorMsg.contains("Timeout") || errorMsg.contains("timeout"))) {
log.warn("Timeout while clicking element at index {}: {}", index, errorMsg);
return failResponse("Element at index " + index + " is not clickable within timeout");
}
log.error("Error clicking element at index {}", index, e);
return failResponse("Failed to click element: " + e.getMessage());
}
}
/**
* 输入文本 - 对应 Python: BrowserUseTool 的 input_text
*/
private ToolResult handleInputText(Map<String, Object> params) {
String text = (String) params.get("text");
Integer index = (Integer) params.get("index");
if (text == null || text.isEmpty()) {
return failResponse("Text parameter is required for input_text action");
}
try {
// 找到所有输入框
var inputs = page.locator("input[type='text'], textarea").all();
if (index != null && index >= 0 && index < inputs.size()) {
inputs.get(index).fill(text);
return successResponse("Input text: " + text + " at index " + index);
} else if (inputs.size() > 0) {
// 如果没有指定索引,使用第一个输入框
inputs.get(0).fill(text);
return successResponse("Input text: " + text);
} else {
return failResponse("No input field found");
}
} catch (Exception e) {
return failResponse("Failed to input text: " + e.getMessage());
}
}
/**
* 向下滚动 - 对应 Python: BrowserUseTool 的 scroll_down
*/
private ToolResult handleScrollDown(Map<String, Object> params) {
try {
page.evaluate("window.scrollBy(0, 500)");
return successResponse("Scrolled down");
} catch (Exception e) {
return failResponse("Failed to scroll: " + e.getMessage());
}
}
/**
* 向上滚动 - 对应 Python: BrowserUseTool 的 scroll_up
*/
private ToolResult handleScrollUp(Map<String, Object> params) {
try {
page.evaluate("window.scrollBy(0, -500)");
return successResponse("Scrolled up");
} catch (Exception e) {
return failResponse("Failed to scroll: " + e.getMessage());
}
}
/**
* 提取内容 - 对应 Python: BrowserUseTool 的 extract_content
*/
private ToolResult handleExtractContent(Map<String, Object> params) {
@SuppressWarnings("unused")
String goal = (String) params.get("goal"); // 保留参数,未来可用于更精确的内容提取
try {
// 提取页面文本内容
String content = page.textContent("body");
if (content == null) {
content = page.innerText("body");
}
// 限制内容长度
if (content != null && content.length() > 2000) {
content = content.substring(0, 2000) + "...";
}
return successResponse("Extracted content: " + (content != null ? content : "No content found"));
} catch (Exception e) {
return failResponse("Failed to extract content: " + e.getMessage());
}
}
/**
* 返回上一页 - 对应 Python: BrowserUseTool 的 go_back
*/
private ToolResult handleGoBack() {
try {
page.goBack();
page.waitForLoadState();
return successResponse("Navigated back");
} catch (Exception e) {
return failResponse("Failed to go back: " + e.getMessage());
}
}
/**
* 等待 - 对应 Python: BrowserUseTool 的 wait
*/
private ToolResult handleWait(Map<String, Object> params) {
try {
Integer seconds = (Integer) params.getOrDefault("seconds", 1);
Thread.sleep(seconds * 1000L);
return successResponse("Waited for " + seconds + " seconds");
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return failResponse("Wait interrupted: " + e.getMessage());
}
}
/**
* 清理资源 - 对应 Python: BrowserUseTool 的 cleanup
*/
@Override
public CompletableFuture<Void> cleanup() {
return CompletableFuture.runAsync(() -> {
try {
if (page != null) {
page.close();
}
if (context != null) {
context.close();
}
if (browser != null) {
browser.close();
}
if (playwright != null) {
playwright.close();
}
log.info("Browser resources cleaned up");
} catch (Exception e) {
log.error("Error cleaning up browser resources", e);
}
});
}
}
3 测试
3.1 工具测试
java
package com.openmanus.domain.tool;
import org.junit.jupiter.api.Test;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import static org.junit.jupiter.api.Assertions.*;
/**
* Tool 接口测试类
*/
class ToolTest {
/**
* 测试工具实现
*/
static class TestTool extends BaseTool {
public TestTool() {
super(
"test_tool",
"A test tool",
Map.of(
"type", "object",
"properties", Map.of(
"input", Map.of(
"type", "string",
"description", "Input parameter"
)
)
)
);
}
@Override
public CompletableFuture<ToolResult> execute(Map<String, Object> params) {
String input = (String) params.getOrDefault("input", "");
return CompletableFuture.completedFuture(
ToolResult.success("Processed: " + input)
);
}
}
/**
* 测试工具执行
*/
@Test
void testToolExecution() {
Tool tool = new TestTool();
Map<String, Object> params = Map.of("input", "test");
CompletableFuture<ToolResult> future = tool.execute(params);
ToolResult result = future.join();
assertTrue(result.isValid());
assertEquals("Processed: test", result.getOutput().toString());
}
/**
* 测试工具函数调用格式
*/
@Test
void testToolToFunctionCallFormat() {
Tool tool = new TestTool();
Map<String, Object> format = tool.toFunctionCallFormat();
assertEquals("function", format.get("type"));
assertTrue(format.containsKey("function"));
@SuppressWarnings("unchecked")
Map<String, Object> function = (Map<String, Object>) format.get("function");
assertEquals("test_tool", function.get("name"));
assertEquals("A test tool", function.get("description"));
}
}
3.2 工具集合测试
java
package com.openmanus.domain.tool;
import com.openmanus.domain.tool.impl.TerminateTool;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.*;
/**
* ToolCollection 测试类
*/
class ToolCollectionTest {
private ToolCollection collection;
@BeforeEach
void setUp() {
collection = new ToolCollection();
}
/**
* 测试添加工具
*/
@Test
void testAddTool() {
Tool tool = new TerminateTool();
collection.addTool(tool);
assertTrue(collection.hasTool("terminate"));
assertEquals(tool, collection.getTool("terminate"));
}
/**
* 测试工具集合查找
*/
@Test
void testToolCollection() {
Tool tool = new TerminateTool();
collection.addTool(tool);
assertTrue(collection.hasTool("terminate"));
Tool retrieved = collection.getTool("terminate");
assertNotNull(retrieved);
assertEquals("terminate", retrieved.getName());
}
/**
* 测试工具集合转换为函数调用格式
*/
@Test
void testToolCollectionToFunctionFormats() {
collection.addTool(new TerminateTool());
List<Map<String, Object>> formats = collection.toFunctionCallFormats();
assertEquals(1, formats.size());
Map<String, Object> format = formats.get(0);
assertEquals("function", format.get("type"));
assertTrue(format.containsKey("function"));
}
/**
* 测试执行工具
*/
@Test
void testToolCollectionExecute() {
collection.addTool(new TerminateTool());
Map<String, Object> params = Map.of("reason", "Test complete");
ToolResult result = collection.execute("terminate", params).join();
assertTrue(result.isValid());
assertNotNull(result.getOutput());
}
/**
* 测试执行不存在的工具
*/
@Test
void testToolCollectionExecuteUnknownTool() {
ToolResult result = collection.execute("unknown_tool", Map.of()).join();
assertFalse(result.isValid());
assertNotNull(result.getError());
assertTrue(result.getError().contains("Unknown tool"));
}
}
3.3 工具结果测试
java
package com.openmanus.domain.tool;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
/**
* ToolResult 测试类
*/
class ToolResultTest {
/**
* 测试成功结果
*/
@Test
void testSuccessResult() {
ToolResult result = ToolResult.success("Test output");
assertTrue(result.isValid());
assertEquals("Test output", result.getOutput());
assertNull(result.getError());
}
/**
* 测试失败结果
*/
@Test
void testFailureResult() {
ToolResult result = ToolResult.failure("Test error");
assertTrue(result.isValid());
assertNull(result.getOutput());
assertEquals("Test error", result.getError());
}
/**
* 测试 toString 方法
*/
@Test
void testToString() {
ToolResult success = ToolResult.success("Output");
assertEquals("Output", success.toString());
ToolResult failure = ToolResult.failure("Error");
assertEquals("Error: Error", failure.toString());
}
}
3.4 浏览器工具测试
java
package com.openmanus.domain.tool.impl;
import com.openmanus.domain.tool.ToolResult;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.DisplayName;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import static org.junit.jupiter.api.Assertions.*;
/**
* BrowserTool 测试类
*
*/
class BrowserToolTest {
private BrowserTool tool;
@BeforeEach
void setUp() {
// 初始化工具
tool = new BrowserTool();
}
@AfterEach
void tearDown() {
if (tool != null) {
try {
// 使用超时机制清理,防止浏览器关闭卡死
tool.cleanup().get(5, TimeUnit.SECONDS);
} catch (Exception e) {
System.err.println("清理浏览器资源失败: " + e.getMessage());
}
}
}
/**
* 辅助方法:执行命令并带超时控制 (30秒)
*/
private ToolResult execute(Map<String, Object> params) throws Exception {
CompletableFuture<ToolResult> future = tool.execute(params);
return future.get(30, TimeUnit.SECONDS);
}
@Test
@DisplayName("测试:访问百度首页")
void testBrowserToolGoToUrl() throws Exception {
Map<String, Object> params = Map.of(
"action", "go_to_url",
"url", "https://www.baidu.com" // 必须带协议头 https://
);
ToolResult result = execute(params);
assertTrue(result.isValid(), "执行结果应为有效");
assertNotNull(result.getOutput(), "输出不应为空");
// 验证页面包含 "百度" 关键字
String content = result.getOutput().toString();
assertTrue(content.contains("百度"), "页面内容应包含'百度',实际内容片段: " + content.substring(0, Math.min(content.length(), 100)));
}
@Test
@DisplayName("测试:点击页面元素")
void testBrowserToolClickElement() throws Exception {
// 1. 先导航到百度
execute(Map.of("action", "go_to_url", "url", "https://www.baidu.com"));
// 2. 点击第0个可交互元素(通常是左上角的新闻或Logo)
Map<String, Object> params = Map.of(
"action", "click_element",
"index", 0
);
ToolResult result = execute(params);
assertTrue(result.isValid());
}
@Test
@DisplayName("测试:提取页面内容")
void testBrowserToolExtractContent() throws Exception {
execute(Map.of("action", "go_to_url", "url", "https://www.baidu.com"));
Map<String, Object> params = Map.of(
"action", "extract_content",
"goal", "提取页面上的主要标题或按钮文字"
);
ToolResult result = execute(params);
assertTrue(result.isValid());
assertNotNull(result.getOutput());
System.out.println("提取到的内容: " + result.getOutput());
}
@Test
@DisplayName("测试:页面滚动")
void testBrowserToolScroll() throws Exception {
// 1. 技巧:访问搜索结果页
String searchUrl = "https://www.baidu.com/s?wd=OpenManus+AI";
execute(Map.of("action", "go_to_url", "url", searchUrl));
// 2. 向下滚动
ToolResult downResult = execute(Map.of("action", "scroll_down"));
assertTrue(downResult.isValid(), "向下滚动失败");
// 3. 向上滚动
ToolResult upResult = execute(Map.of("action", "scroll_up"));
assertTrue(upResult.isValid(), "向上滚动失败");
}
@Test
@DisplayName("测试:错误参数处理")
void testBrowserToolErrorHandling() throws Exception {
// 测试 1: 缺少 action 参数
ToolResult result1 = execute(Map.of());
assertFalse(result1.isValid(), "缺少参数应返回无效结果");
assertNotNull(result1.getError());
// 测试 2: 未知的 action
ToolResult result2 = execute(Map.of("action", "fly_to_moon"));
assertFalse(result2.isValid(), "未知动作应返回无效结果");
assertNotNull(result2.getError());
}
}
结果:
java
11:09:52.562 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser initialized with 10s default timeout
11:10:00.186 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser resources cleaned up
11:10:01.336 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser initialized with 10s default timeout
11:10:10.318 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Successfully clicked element at index 0 using JavaScript
11:10:10.540 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser resources cleaned up
11:10:11.509 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser initialized with 10s default timeout
11:10:19.226 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser resources cleaned up
11:10:20.205 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser initialized with 10s default timeout
11:10:20.398 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser resources cleaned up
11:10:21.308 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser initialized with 10s default timeout
提取到的内容: Extracted content:
if (navigator.userAgent.indexOf('Edge') > -1) {
var body = document.querySelector('body');
body.className += ' browser-edge';
}
<style data-for="result" type="text/css" >html{font-size:100px}html body{font-size:.14rem;font-size:14px}body{color:#333;background:#fff;padding:6px 0 0;margin:0;position:relative}body,th,td,.p1,.p2{font-family:arial}p,form,ol,ul,li,dl,dt,dd,h3{margin:0;padding:0;list-style:none}input{padding-top:0;padding-bottom:0;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}table,img{border:0}td{font-size:9pt;line-height:18px}em{font-style:normal}em{font-style:normal;color:#c00}a em{text-decoration:underline}cite{font-style:normal;color:green}.m,a.m{color:#666}a.m:visited{color:#606}.g,a.g{color:green}.c{color:#77c}.f14{font-size:14px}.f10{font-size:10.5pt}.f16{font-size:16px}.f13{font-size:13px}.bg{background-image:url(https://pss.bdstatic.com/r/www/cache/static/protocol/https/global/img/icons_441e82f.png);_background-image:url(https://pss.bdstatic.com/r/www/cache/static/protocol/https/global/img/icons_d5b04cc.gif);background-repeat:no-repeat}#u,#head,#tool,#search,#foot{font-size:12px}.logo{width:117px;height:38px;cursor:pointer}.p1{line-height:120%;margin-left:-12pt}.p2{width:100%;line-height:120%;margin-left:-12pt}#wrapper{_zoom:1}#container{word-break:break-all;word-wrap:break-word;position:relative}.container_s{width:1002px}.container_l{width:1222px}#content_left{width:636px;float:left;padding-left:35px}#content_right{border-left:1px solid #e1e1e1;float:right}.container_s #content_right{width:271px}.container_l #content_right{width:434px}.content_none{padding-left:35px}#u{color:#999;white-space:nowrap;position:absolute;right:10px;top:4px;z-index:299}#u a{color:#00c;margin:0 5px}#u .reg{margin:0}#u .last{margin-right:0}#u .un{font-weight:700;margin-right:5px}#u ul{width:100%;background:#fff;border:1px solid #9b9b9b}#u li{height:25px}#u li a{width:100%;height:25px;line-height:25px;display:bloc...
11:10:29.374 [ForkJoinPool.commonPool-worker-1] INFO com.openmanus.domain.tool.impl.BrowserTool -- Browser resources cleaned up
进程已结束,退出代码为 0
4 思考
4.1 转换格式
我们之前在com.openmanus.infrastructure.llm.LangChain4jLLMService#convertMapToToolSpecification也写过格式转化
java
/**
* 辅助方法:将 Map 结构的 Tool 定义转换为 LangChain4j 的 ToolSpecification
* 简单实现:主要处理 String 类型的参数,用于适配测试用例
*/
@SuppressWarnings("unchecked")
private ToolSpecification convertMapToToolSpecification(Map<String, Object> toolMap) {
try {
// 兼容处理
Map<String, Object> function;
if (toolMap.containsKey("function")) {
function = (Map<String, Object>) toolMap.get("function");
} else {
function = toolMap;
}
String name = (String) function.get("name");
String description = (String) function.get("description");
// 准备构建 ToolParameters 所需的 Map
// key = 参数名, value = 参数定义的 Map (type, description 等)
Map<String, Map<String, Object>> propertiesMap = new HashMap<>();
List<String> requiredList = new ArrayList<>();
if (function.containsKey("parameters")) {
Map<String, Object> parameters = (Map<String, Object>) function.get("parameters");
// 1. 获取必填项
if (parameters.containsKey("required")) {
requiredList = (List<String>) parameters.get("required");
}
// 2. 构建属性 Map
if (parameters.containsKey("properties")) {
Map<String, Object> rawProps = (Map<String, Object>) parameters.get("properties");
for (Map.Entry<String, Object> entry : rawProps.entrySet()) {
String propName = entry.getKey();
Map<String, Object> propDef = (Map<String, Object>) entry.getValue();
// 重新构建一个干净的 map,确保格式正确
Map<String, Object> schema = new HashMap<>();
schema.put("type", propDef.get("type")); // integer, string, etc.
if (propDef.containsKey("description")) {
schema.put("description", propDef.get("description"));
}
// 如果有 enum 也可以加进去
if (propDef.containsKey("enum")) {
schema.put("enum", propDef.get("enum"));
}
propertiesMap.put(propName, schema);
}
}
}
// 3. 构建 ToolSpecification
return ToolSpecification.builder()
.name(name)
.description(description)
.parameters(ToolParameters.builder()
.properties(propertiesMap)
.required(requiredList)
.build())
.build();
} catch (Exception e) {
log.warn("Failed to convert tool map to specification", e);
return ToolSpecification.builder()
.name("unknown_tool")
.description("Error parsing tool")
.parameters(ToolParameters.builder().build())
.build();
}
}
这里的格式转换和我们这次写的tool的格式转换有什么不同呢?
Tool.toFunctionCallFormat()
- 生成的是标准的 OpenAI 函数调用格式(OpenAI Function Calling 规范)
- 这个格式可以被 OpenAI、Anthropic、DashScope 等支持 Function Calling 的 LLM 直接使用
LangChain4jLLMService.convertMapToToolSpecification()
- 将标准的 OpenAI 格式转换为 LangChain4j 框架需要的格式(ToolSpecification)
- LangChain4j 有自己的内部表示,需要适配