一、image 工具
1.1 工具概述
功能 :分析图像(视觉模型)
核心特性:
- 支持单图/多图分析(最多 20 张)
- 支持路径/URL/DataURL
- 支持沙盒隔离
- 自动 MIME 类型检测
- 支持视觉模型的自动识别
1.2 Schema 定义
位置:第 29155 行附近
javascript
const ImageToolSchema = Type.Object({
prompt: Type.Optional(Type.String()),
image: Type.Optional(Type.String({ description: "Single image path or URL." })),
images: Type.Optional(Type.Array(Type.String(), {
description: "Multiple image paths or URLs (up to maxImages, default 20)."
})),
model: Type.Optional(Type.String()),
maxBytesMb: Type.Optional(Type.Number()),
maxImages: Type.Optional(Type.Number())
});
1.3 完整执行代码
位置:第 29155 行
javascript
function createImageTool(options) {
// 1. 检查 agentDir(必需)
const agentDir = options?.agentDir?.trim();
if (!agentDir) {
const explicit = coerceImageModelConfig(options?.config);
if (hasToolModelConfig(explicit)) {
throw new Error("createImageTool requires agentDir when enabled");
}
return null; // 工具不可用
}
// 2. 解析图像模型配置
const imageModelConfig = resolveImageModelConfigForTool({
cfg: options?.config,
agentDir
});
if (!imageModelConfig) return null;
return {
label: "Image",
name: "image",
description: options?.modelHasVision ?
"Analyze one or more images with a vision model. Use image for a single path/URL, or images for multiple (up to 20). Only use this tool when images were NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you." :
"Analyze one or more images with the configured image model (agents.defaults.imageModel). Use image for a single path/URL, or images for multiple (up to 20). Provide a prompt describing what to analyze.",
parameters: Type.Object({
prompt: Type.Optional(Type.String()),
image: Type.Optional(Type.String({ description: "Single image path or URL." })),
images: Type.Optional(Type.Array(Type.String(), {
description: "Multiple image paths or URLs (up to maxImages, default 20)."
})),
model: Type.Optional(Type.String()),
maxBytesMb: Type.Optional(Type.Number()),
maxImages: Type.Optional(Type.Number())
}),
execute: async (_toolCallId, args) => {
const record = args && typeof args === "object" ? args : {};
// 3. 收集图片候选
const imageCandidates = [];
if (typeof record.image === "string") imageCandidates.push(record.image);
if (Array.isArray(record.images)) {
imageCandidates.push(...record.images.filter((v) => typeof v === "string"));
}
// 4. 去重
const seenImages = new Set();
const imageInputs = [];
for (const candidate of imageCandidates) {
const trimmedCandidate = candidate.trim();
const normalizedForDedupe = trimmedCandidate.startsWith("@") ?
trimmedCandidate.slice(1).trim() : trimmedCandidate;
if (!normalizedForDedupe || seenImages.has(normalizedForDedupe)) continue;
seenImages.add(normalizedForDedupe);
imageInputs.push(trimmedCandidate);
}
// 5. 检查数量限制
if (imageInputs.length === 0) {
throw new Error("image required");
}
const maxImagesRaw = typeof record.maxImages === "number" ? record.maxImages : void 0;
const maxImages = typeof maxImagesRaw === "number" && Number.isFinite(maxImagesRaw) && maxImagesRaw > 0 ?
Math.floor(maxImagesRaw) : DEFAULT_MAX_IMAGES; // 默认 20
if (imageInputs.length > maxImages) {
return {
content: [{
type: "text",
text: `Too many images: ${imageInputs.length} provided, maximum is ${maxImages}. Please reduce the number of images.`
}],
details: {
error: "too_many_images",
count: imageInputs.length,
max: maxImages
}
};
}
// 6. 解析提示词和模型
const { prompt: promptRaw, modelOverride } = resolvePromptAndModelOverride(record, DEFAULT_PROMPT);
const maxBytesMb = typeof record.maxBytesMb === "number" ? record.maxBytesMb : void 0;
const maxBytes = pickMaxBytes(options?.config, maxBytesMb);
// 7. 解析沙盒配置
const sandboxConfig = options?.sandbox && options?.sandbox.root.trim() ? {
root: options.sandbox.root.trim(),
bridge: options.sandbox.bridge,
workspaceOnly: options.fsPolicy?.workspaceOnly === true
} : null;
// 8. 加载图片
const loadedImages = [];
for (const imageRawInput of imageInputs) {
const trimmed = imageRawInput.trim();
const imageRaw = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
if (!imageRaw) {
throw new Error("image required (empty string in array)");
}
// 检查 URL 格式
const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(imageRaw);
const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(imageRaw);
const isFileUrl = /^file:/i.test(imageRaw);
const isHttpUrl = /^https?:\/\//i.test(imageRaw);
const isDataUrl = /^data:/i.test(imageRaw);
if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
return {
content: [{
type: "text",
text: `Unsupported image reference: ${imageRawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`
}],
details: {
error: "unsupported_image_reference",
image: imageRawInput
}
};
}
// 沙盒模式不允许 HTTP URL
if (sandboxConfig && isHttpUrl) {
throw new Error("Sandboxed image tool does not allow remote URLs.");
}
// 解析路径
const resolvedImage = (() => {
if (sandboxConfig) return imageRaw;
if (imageRaw.startsWith("~")) return resolveUserPath(imageRaw);
return imageRaw;
})();
// 解析沙盒路径
const resolvedPathInfo = isDataUrl ? { resolved: "" } : sandboxConfig ?
await resolveSandboxedBridgeMediaPath({
sandbox: sandboxConfig,
mediaPath: resolvedImage,
inboundFallbackDir: "media/inbound"
}) : { resolved: resolvedImage.startsWith("file://") ? resolvedImage.slice(7) : resolvedImage };
const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
// 加载媒体
const mediaLocalRoots = resolveMediaToolLocalRoots(
options?.workspaceDir,
{ workspaceOnly: options?.fsPolicy?.workspaceOnly === true },
resolvedPath ? [resolvedPath] : void 0
);
const media = isDataUrl ? decodeDataUrl(resolvedImage) : sandboxConfig ?
await loadWebMedia(resolvedPath ?? resolvedImage, {
maxBytes,
sandboxValidated: true,
readFile: createSandboxBridgeReadFile({ sandbox: sandboxConfig })
}) : await loadWebMedia(resolvedPath ?? resolvedImage, {
maxBytes,
localRoots: mediaLocalRoots
});
if (media.kind !== "image") {
throw new Error(`Unsupported media type: ${media.kind}`);
}
const mimeType = "contentType" in media && media.contentType ||
"mimeType" in media && media.mimeType || "image/png";
loadedImages.push({
buffer: media.buffer,
mimeType,
resolvedImage,
...resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}
});
}
// 9. 调用视觉模型
return buildTextToolResult(await runImagePrompt({
cfg: options?.config,
agentDir,
imageModelConfig,
modelOverride,
prompt: promptRaw,
images: loadedImages.map((img) => ({
buffer: img.buffer,
mimeType: img.mimeType
}))
}), loadedImages.length === 1 ? {
image: loadedImages[0].resolvedImage,
...loadedImages[0].rewrittenFrom ? { rewrittenFrom: loadedImages[0].rewrittenFrom } : {}
} : {
images: loadedImages.map((img) => ({
image: img.resolvedImage,
...img.rewrittenFrom ? { rewrittenFrom: img.rewrittenFrom } : {}
}))
});
}
};
}
1.4 支持的图片格式
| 格式 | 说明 | 支持 |
|---|---|---|
| 文件路径 | 本地文件路径 | ✅ |
| file:// URL | 文件 URL | ✅ |
| http(s):// URL | 网络 URL | ✅(非沙盒) |
| data: URL | Data URL | ✅ |
| ~ 开头 | 用户目录 | ✅ |
1.5 执行流程图
image 工具调用
↓
1. 检查 agentDir(必需)
↓
2. 解析图像模型配置
↓
3. 收集图片候选
↓
4. 去重
↓
5. 检查数量限制(≤20)
↓
6. 解析提示词和模型
↓
7. 解析沙盒配置
↓
8. 加载每张图片
├─ 检查 URL 格式
├─ 解析路径
├─ 加载媒体
└─ 验证类型
↓
9. 调用视觉模型
↓
10. 返回结果
二、pdf 工具
2.1 工具概述
功能 :分析 PDF 文档
核心特性:
- 支持单 PDF/多 PDF(最多 10 个)
- 支持原生 PDF 分析(Anthropic/Google)
- 支持文本/图像提取回退
- 支持页码范围选择
- 支持沙盒隔离
2.2 Schema 定义
位置:第 104199 行
javascript
const PdfToolSchema = Type.Object({
prompt: Type.Optional(Type.String()),
pdf: Type.Optional(Type.String({ description: "Single PDF path or URL." })),
pdfs: Type.Optional(Type.Array(Type.String(), {
description: "Multiple PDF paths or URLs (up to 10)."
})),
pages: Type.Optional(Type.String({ description: 'Page range to process, e.g. "1-5", "1,3,5-7". Defaults to all pages.' })),
model: Type.Optional(Type.String()),
maxBytesMb: Type.Optional(Type.Number())
});
2.3 完整执行代码(部分)
位置:第 104199 行
javascript
function createPdfTool(options) {
// 1. 检查 agentDir(必需)
const agentDir = options?.agentDir?.trim();
if (!agentDir) {
const explicit = coercePdfModelConfig(options?.config);
if (explicit.primary?.trim() || (explicit.fallbacks?.length ?? 0) > 0) {
throw new Error("createPdfTool requires agentDir when enabled");
}
return null; // 工具不可用
}
// 2. 解析 PDF 模型配置
const pdfModelConfig = resolvePdfModelConfigForTool({
cfg: options?.config,
agentDir
});
if (!pdfModelConfig) return null;
// 3. 解析默认限制
const maxBytesMbDefault = (options?.config?.agents?.defaults)?.pdfMaxBytesMb;
const maxPagesDefault = (options?.config?.agents?.defaults)?.pdfMaxPages;
const configuredMaxBytesMb = typeof maxBytesMbDefault === "number" && Number.isFinite(maxBytesMbDefault) ?
maxBytesMbDefault : DEFAULT_MAX_BYTES_MB; // 默认值
const configuredMaxPages = typeof maxPagesDefault === "number" && Number.isFinite(maxPagesDefault) ?
Math.floor(maxPagesDefault) : DEFAULT_MAX_PAGES; // 默认值
return {
label: "PDF",
name: "pdf",
description: "Analyze one or more PDF documents with a model. Supports native PDF analysis for Anthropic and Google models, with text/image extraction fallback for other providers. Use pdf for a single path/URL, or pdfs for multiple (up to 10). Provide a prompt describing what to analyze.",
parameters: Type.Object({
prompt: Type.Optional(Type.String()),
pdf: Type.Optional(Type.String({ description: "Single PDF path or URL." })),
pdfs: Type.Optional(Type.Array(Type.String(), {
description: "Multiple PDF paths or URLs (up to 10)."
})),
pages: Type.Optional(Type.String({ description: 'Page range to process, e.g. "1-5", "1,3,5-7". Defaults to all pages.' })),
model: Type.Optional(Type.String()),
maxBytesMb: Type.Optional(Type.Number())
}),
execute: async (_toolCallId, args) => {
const record = args && typeof args === "object" ? args : {};
// 4. 收集 PDF 候选
const pdfCandidates = [];
if (typeof record.pdf === "string") pdfCandidates.push(record.pdf);
if (Array.isArray(record.pdfs)) {
pdfCandidates.push(...record.pdfs.filter((v) => typeof v === "string"));
}
// 5. 去重
const seenPdfs = new Set();
const pdfInputs = [];
for (const candidate of pdfCandidates) {
const trimmed = candidate.trim();
if (!trimmed || seenPdfs.has(trimmed)) continue;
seenPdfs.add(trimmed);
pdfInputs.push(trimmed);
}
// 6. 检查必需参数
if (pdfInputs.length === 0) {
throw new Error("pdf required: provide a path or URL to a PDF document");
}
// 7. 检查数量限制
if (pdfInputs.length > DEFAULT_MAX_PDFS) {
return {
content: [{
type: "text",
text: `Too many PDFs: ${pdfInputs.length} provided, maximum is ${DEFAULT_MAX_PDFS}. Please reduce the number.`
}],
details: {
error: "too_many_pdfs",
count: pdfInputs.length,
max: DEFAULT_MAX_PDFS
}
};
}
// 8. 解析提示词和模型
const { prompt: promptRaw, modelOverride } = resolvePromptAndModelOverride(record, DEFAULT_PROMPT);
const maxBytesMbRaw = typeof record.maxBytesMb === "number" ? record.maxBytesMb : void 0;
const maxBytesMb = typeof maxBytesMbRaw === "number" && Number.isFinite(maxBytesMbRaw) && maxBytesMbRaw > 0 ?
maxBytesMbRaw : configuredMaxBytesMb;
const maxBytes = Math.floor(maxBytesMb * 1024 * 1024);
// 9. 解析页码范围
const pagesRaw = typeof record.pages === "string" && record.pages.trim() ? record.pages.trim() : void 0;
// 10. 解析沙盒配置
const sandboxConfig = options?.sandbox && options.sandbox.root.trim() ? {
root: options.sandbox.root.trim(),
bridge: options.sandbox.bridge,
workspaceOnly: options.fsPolicy?.workspaceOnly === true
} : null;
// 11. 加载 PDF
const loadedPdfs = [];
for (const pdfRaw of pdfInputs) {
const trimmed = pdfRaw.trim();
const isHttpUrl = /^https?:\/\//i.test(trimmed);
const isFileUrl = /^file:/i.test(trimmed);
const isDataUrl = /^data:/i.test(trimmed);
const looksLikeWindowsDrive = /^[a-zA-Z]:[\\/]/.test(trimmed);
// 检查 URL 格式
if (/^[a-z][a-z0-9+.-]*:/i.test(trimmed) && !looksLikeWindowsDrive && !isFileUrl && !isHttpUrl && !isDataUrl) {
return {
content: [{
type: "text",
text: `Unsupported PDF reference: ${pdfRaw}. Use a file path, file:// URL, or http(s) URL.`
}],
details: {
error: "unsupported_pdf_reference",
pdf: pdfRaw
}
};
}
// 沙盒模式不允许 HTTP URL
if (sandboxConfig && isHttpUrl) {
throw new Error("Sandboxed PDF tool does not allow remote URLs.");
}
// 解析路径
const resolvedPdf = (() => {
if (sandboxConfig) return trimmed;
if (trimmed.startsWith("~")) return resolveUserPath(trimmed);
return trimmed;
})();
// 解析沙盒路径
const resolvedPathInfo = sandboxConfig ?
await resolveSandboxedBridgeMediaPath({
sandbox: sandboxConfig,
mediaPath: resolvedPdf,
inboundFallbackDir: "media/inbound"
}) : { resolved: resolvedPdf.startsWith("file://") ? resolvedPdf.slice(7) : resolvedPdf };
// 加载媒体
const localRoots = resolveMediaToolLocalRoots(
options?.workspaceDir,
{ workspaceOnly: options?.fsPolicy?.workspaceOnly === true },
[resolvedPathInfo.resolved]
);
const media = sandboxConfig ?
await loadWebMediaRaw(resolvedPathInfo.resolved, {
maxBytes,
sandboxValidated: true,
readFile: createSandboxBridgeReadFile({ sandbox: sandboxConfig })
}) : await loadWebMediaRaw(resolvedPathInfo.resolved, {
maxBytes,
localRoots
});
// 验证 PDF 类型
if (media.kind !== "document") {
const ct = (media.contentType ?? "").toLowerCase();
if (!ct.includes("pdf") && !ct.includes("application/pdf")) {
throw new Error(`Expected PDF but got ${media.contentType ?? media.kind}: ${pdfRaw}`);
}
}
const base64 = media.buffer.toString("base64");
const filename = media.fileName ?? (isHttpUrl ?
new URL(trimmed).pathname.split("/").pop() ?? "document.pdf" : "document.pdf");
loadedPdfs.push({
base64,
buffer: media.buffer,
filename,
resolvedPath: resolvedPathInfo.resolved,
...resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}
});
}
// 12. 解析页码
const pageNumbers = pagesRaw ? parsePageRange(pagesRaw, configuredMaxPages) : void 0;
// 13. 构建提取函数
const getExtractions = async () => {
const extractedAll = [];
for (const pdf of loadedPdfs) {
const extracted = await extractPdfContent({
buffer: pdf.buffer,
maxPages: configuredMaxPages,
maxPixels: PDF_MAX_PIXELS,
minTextChars: PDF_MIN_TEXT_CHARS,
pageNumbers
});
extractedAll.push(extracted);
}
return extractedAll;
};
// 14. 调用 PDF 分析
const result = await runPdfPrompt({
cfg: options?.config,
agentDir,
pdfModelConfig,
modelOverride,
prompt: promptRaw,
pdfBuffers: loadedPdfs.map((p) => ({
base64: p.base64,
filename: p.filename
})),
pageNumbers,
getExtractions
});
// 15. 返回结果
const pdfDetails = loadedPdfs.length === 1 ? {
pdf: loadedPdfs[0].resolvedPath,
...loadedPdfs[0].rewrittenFrom ? { rewrittenFrom: loadedPdfs[0].rewrittenFrom } : {}
} : {
pdfs: loadedPdfs.map((p) => ({
pdf: p.resolvedPath,
...p.rewrittenFrom ? { rewrittenFrom: p.rewrittenFrom } : {}
}))
};
return buildTextToolResult(result, pdfDetails);
}
};
}
2.4 页码范围解析
javascript
function parsePageRange(rangeStr, maxPages) {
// 支持格式:
// "1-5" → [1,2,3,4,5]
// "1,3,5-7" → [1,3,5,6,7]
// "10-" → [10,11,12,...maxPages]
const pages = new Set();
const parts = rangeStr.split(',');
for (const part of parts) {
const trimmed = part.trim();
if (!trimmed) continue;
if (trimmed.includes('-')) {
const [start, end] = trimmed.split('-').map((s) => s.trim());
const startNum = start ? parseInt(start, 10) : 1;
const endNum = end ? parseInt(end, 10) : maxPages;
for (let i = startNum; i <= endNum && i <= maxPages; i++) {
pages.add(i);
}
} else {
const num = parseInt(trimmed, 10);
if (!isNaN(num) && num >= 1 && num <= maxPages) {
pages.add(num);
}
}
}
return Array.from(pages).sort((a, b) => a - b);
}
2.5 执行流程图
pdf 工具调用
↓
1. 检查 agentDir(必需)
↓
2. 解析 PDF 模型配置
↓
3. 收集 PDF 候选
↓
4. 去重
↓
5. 检查必需参数
↓
6. 检查数量限制(≤10)
↓
7. 解析提示词和模型
↓
8. 解析页码范围
↓
9. 解析沙盒配置
↓
10. 加载每个 PDF
├─ 检查 URL 格式
├─ 解析路径
├─ 加载媒体
└─ 验证类型
↓
11. 解析页码
↓
12. 提取内容(文本/图像)
↓
13. 调用 PDF 分析
↓
14. 返回结果
三、关键机制对比
3.1 功能定位
| 特性 | image | |
|---|---|---|
| 用途 | 图像分析 | PDF 文档分析 |
| 输入 | 图片文件 | PDF 文件 |
| 最大数量 | 20 个 | 10 个 |
3.2 模型支持
| 特性 | image | |
|---|---|---|
| 视觉模型 | 需要 | 需要 |
| 原生支持 | 通用 | Anthropic/Google |
| 提取回退 | 不支持 | 支持 |
3.3 安全限制
| 限制类型 | image | |
|---|---|---|
| 沙盒隔离 | 支持 | 支持 |
| HTTP URL | (非沙盒) | (非沙盒) |
| 工作目录 | workspaceOnly | workspaceOnly |
四、使用示例
4.1 image 工具调用
用户 :分析这张图片里有什么
大模型返回:
json
{
"tool_call": {
"name": "image",
"arguments": {
"image": "/path/to/photo.jpg",
"prompt": "描述这张图片的内容"
}
}
}
执行结果:
json
{
"content": [{
"type": "text",
"text": "这张图片展示了一只可爱的猫咪..."
}],
"details": {
"image": "/path/to/photo.jpg"
}
}
4.2 pdf 工具调用
用户 :分析这份 PDF 文档
大模型返回:
json
{
"tool_call": {
"name": "pdf",
"arguments": {
"pdf": "/path/to/document.pdf",
"prompt": "总结这份文档的主要内容",
"pages": "1-5"
}
}
}
执行结果:
json
{
"content": [{
"type": "text",
"text": "这份文档主要讲述了..."
}],
"details": {
"pdf": "/path/to/document.pdf",
"pages": "1-5"
}
}