存档111111111 - 技术栈

bash 复制代码

 多种大模型API入参兼容实现机制

  Kode项目通过多层级适配器架构实现多种大模型API入参兼容，具体实现如下：

  1. 模型适配器工厂模式 (src/services/modelAdapterFactory.ts)

  核心逻辑：
  static createAdapter(modelProfile: ModelProfile): ModelAPIAdapter {
    const capabilities = getModelCapabilities(modelProfile.modelName)

    // 根据模型能力决定使用哪种API
    const apiType = this.determineAPIType(modelProfile, capabilities)

    switch (apiType) {
      case 'responses_api':
        return new ResponsesAPIAdapter(capabilities, modelProfile)
      case 'chat_completions':
      default:
        return new ChatCompletionsAdapter(capabilities, modelProfile)
    }
  }

  2. 模型能力注册表系统 (src/constants/modelCapabilities.ts)

  模型能力定义：
  // GPT-5标准能力定义
  const GPT5_CAPABILITIES: ModelCapabilities = {
    apiArchitecture: {
      primary: 'responses_api',
      fallback: 'chat_completions'
    },
    parameters: {
      maxTokensField: 'max_completion_tokens', // GPT-5使用不同字段
      supportsReasoningEffort: true,
      supportsVerbosity: true,
      temperatureMode: 'fixed_one' // GPT-5温度固定为1
    },
    toolCalling: {
      mode: 'custom_tools',
      supportsFreeform: true,
      supportsAllowedTools: true,
      supportsParallelCalls: true
    }
  }

  // Chat Completions标准能力定义
  const CHAT_COMPLETIONS_CAPABILITIES: ModelCapabilities = {
    apiArchitecture: {
      primary: 'chat_completions'
    },
    parameters: {
      maxTokensField: 'max_tokens', // 传统字段
      supportsReasoningEffort: false,
      supportsVerbosity: false,
      temperatureMode: 'flexible' // 灵活温度
    }
  }

  智能推断机制：
  export function inferModelCapabilities(modelName: string): ModelCapabilities | null {
    if (!modelName) return null

    const lowerName = modelName.toLowerCase()

    // GPT-5系列检测
    if (lowerName.includes('gpt-5') || lowerName.includes('gpt5')) {
      return GPT5_CAPABILITIES
    }

    // GLM系列
    if (lowerName.includes('glm-5') || lowerName.includes('glm5')) {
      return {
        ...GPT5_CAPABILITIES,
        toolCalling: {
          ...GPT5_CAPABILITIES.toolCalling,
          supportsAllowedTools: false  // GLM可能不支持
        }
      }
    }

    // 默认使用Chat Completions
    return null
  }

  3. OpenAI兼容性处理核心 (src/services/openai.ts)

  多提供商支持列表：
  const isOpenAICompatible = [
    'minimax', 'kimi', 'deepseek', 'siliconflow', 'qwen',
    'glm', 'baidu-qianfan', 'openai', 'mistral', 'xai',
    'groq', 'custom-openai'
  ].includes(provider)

  参数转换机制：
  function applyModelSpecificTransformations(
    opts: OpenAI.ChatCompletionCreateParams,
  ): void {
    const features = getModelFeatures(opts.model)
    const isGPT5 = opts.model.toLowerCase().includes('gpt-5')

    // GPT-5参数转换
    if (isGPT5 || features.usesMaxCompletionTokens) {
      // max_tokens -> max_completion_tokens 转换
      if ('max_tokens' in opts && !('max_completion_tokens' in opts)) {
        opts.max_completion_tokens = opts.max_tokens
        delete opts.max_tokens
      }

      // 温度强制为1
      if (features.requiresTemperatureOne && 'temperature' in opts) {
        if (opts.temperature !== 1 && opts.temperature !== undefined) {
          opts.temperature = 1
        }
      }

      // 移除GPT-5不支持的参数
      if (isGPT5) {
        delete opts.frequency_penalty
        delete opts.presence_penalty
        delete opts.logit_bias
        delete opts.user
      }
    }
  }

  4. 端点回退机制

  多端点尝试：
  async function tryWithEndpointFallback(
    baseURL: string,
    opts: OpenAI.ChatCompletionCreateParams,
    headers: Record<string, string>,
    provider: string,
    proxy: any,
    signal?: AbortSignal,
  ): Promise<{ response: Response; endpoint: string }> {
    const endpointsToTry = []

    if (provider === 'minimax') {
      endpointsToTry.push('/text/chatcompletion_v2', '/chat/completions')
    } else {
      endpointsToTry.push('/chat/completions')
    }

    // 依次尝试所有端点
    for (const endpoint of endpointsToTry) {
      try {
        const response = await fetch(`${baseURL}${endpoint}`, {
          method: 'POST',
          headers,
          body: JSON.stringify(opts.stream ? { ...opts, stream: true } : opts),
          dispatcher: proxy,
          signal: signal,
        })

        if (response.ok) {
          return { response, endpoint }
        }
      } catch (error) {
        // 继续尝试下一个端点
      }
    }
  }

  5. 错误处理和自动修复

  错误检测器：
  interface ErrorHandler {
    type: ModelErrorType
    detect: ErrorDetector
    fix: ErrorFixer
  }

  const GPT5_ERROR_HANDLERS: ErrorHandler[] = [
    {
      type: ModelErrorType.MaxCompletionTokens,
      detect: errMsg => {
        const lowerMsg = errMsg.toLowerCase()
        return (
          lowerMsg.includes("unsupported parameter: 'max_tokens'") ||
          lowerMsg.includes("max_tokens") && lowerMsg.includes("max_completion_tokens")
        )
      },
      fix: async opts => {
        if ('max_tokens' in opts) {
          opts.max_completion_tokens = opts.max_tokens
          delete opts.max_tokens
        }
      },
    }
  ]

  6. 具体提供商适配示例

  DeepSeek适配：
  - 使用 provider: 'deepseek'
  - 自动检测为OpenAI兼容
  - 使用 /chat/completions 端点
  - 支持标准参数映射

  Qwen适配：
  - 使用 provider: 'qwen'
  - 自动检测为OpenAI兼容
  - 使用 /chat/completions 端点
  - 支持标准参数映射

  自定义API适配：
  - 使用 provider: 'custom-openai'
  - 支持自定义baseURL
  - 自动推断模型能力
  - 支持端点回退机制

  7. GPT-5 Responses API特殊处理

  GPT-5 API调用：
  export async function callGPT5ResponsesAPI(
    modelProfile: any,
    opts: any,
    signal?: AbortSignal,
  ): Promise<any> {
    // GPT-5使用不同的参数结构
    const responsesParams: any = {
      model: opts.model,
      input: opts.messages, // Responses API使用'input'而非'messages'
      max_completion_tokens: opts.max_completion_tokens,
      reasoning: {
        effort: opts.reasoning_effort || 'medium',
        generate_summary: true,
      }
    }
  }

  8. 模型配置系统 (src/utils/config.ts)

  提供商类型定义：
  export type ProviderType =
    | 'anthropic' | 'openai' | 'mistral' | 'deepseek' | 'kimi'
    | 'qwen' | 'glm' | 'minimax' | 'baidu-qianfan' | 'siliconflow'
    | 'bigdream' | 'opendev' | 'xai' | 'groq' | 'gemini'
    | 'ollama' | 'azure' | 'custom' | 'custom-openai'

  总结

  Kode项目通过以下机制实现多种大模型API入参兼容：

  1. 分层适配器架构：工厂模式根据模型能力选择合适适配器
  2. 智能能力推断：基于模型名称自动推断技术能力
  3. 参数映射转换：统一参数到各提供商特定参数的转换
  4. 端点回退机制：多端点尝试确保兼容性
  5. 错误自动修复：运行时检测并修复参数错误
  6. 提供商特定优化：为每个提供商提供最佳参数配置

  这种设计使得Kode能够无缝支持DeepSeek、Qwen、GLM等数十种大模型，同时保持统一的开发体验。

bash 复制代码

 1. 模型适配器工厂模式 (src/services/modelAdapterFactory.ts)

  核心逻辑：
  static createAdapter(modelProfile: ModelProfile): ModelAPIAdapter {
    const capabilities = getModelCapabilities(modelProfile.modelName)

    // 根据模型能力决定使用哪种API
    const apiType = this.determineAPIType(modelProfile, capabilities)

    switch (apiType) {
      case 'responses_api':
        return new ResponsesAPIAdapter(capabilities, modelProfile)
      case 'chat_completions':
      default:
        return new ChatCompletionsAdapter(capabilities, modelProfile)
    }
  }

  2. 模型能力注册表系统 (src/constants/modelCapabilities.ts)

  模型能力定义：
  // GPT-5标准能力定义
  const GPT5_CAPABILITIES: ModelCapabilities = {
    apiArchitecture: {
      primary: 'responses_api',
      fallback: 'chat_completions'
    },
    parameters: {
      maxTokensField: 'max_completion_tokens', // GPT-5使用不同字段
      supportsReasoningEffort: true,
      supportsVerbosity: true,
      temperatureMode: 'fixed_one' // GPT-5温度固定为1
    },
    toolCalling: {
      mode: 'custom_tools',
      supportsFreeform: true,
      supportsAllowedTools: true,
      supportsParallelCalls: true
    }
  }

  // Chat Completions标准能力定义
  const CHAT_COMPLETIONS_CAPABILITIES: ModelCapabilities = {
    apiArchitecture: {
      primary: 'chat_completions'
    },
    parameters: {
      maxTokensField: 'max_tokens', // 传统字段
      supportsReasoningEffort: false,
      supportsVerbosity: false,
      temperatureMode: 'flexible' // 灵活温度
    }
  }

  智能推断机制：
  export function inferModelCapabilities(modelName: string): ModelCapabilities | null {
    if (!modelName) return null

    const lowerName = modelName.toLowerCase()

    // GPT-5系列检测
    if (lowerName.includes('gpt-5') || lowerName.includes('gpt5')) {
      return GPT5_CAPABILITIES
    }

    // GLM系列
    if (lowerName.includes('glm-5') || lowerName.includes('glm5')) {
      return {
        ...GPT5_CAPABILITIES,
        toolCalling: {
          ...GPT5_CAPABILITIES.toolCalling,
          supportsAllowedTools: false  // GLM可能不支持
        }
      }
    }

    // 默认使用Chat Completions
    return null
  }

  3. OpenAI兼容性处理核心 (src/services/openai.ts)

  多提供商支持列表：
  const isOpenAICompatible = [
    'minimax', 'kimi', 'deepseek', 'siliconflow', 'qwen',
    'glm', 'baidu-qianfan', 'openai', 'mistral', 'xai',
    'groq', 'custom-openai'
  ].includes(provider)

  参数转换机制：
  function applyModelSpecificTransformations(
    opts: OpenAI.ChatCompletionCreateParams,
  ): void {
    const features = getModelFeatures(opts.model)
    const isGPT5 = opts.model.toLowerCase().includes('gpt-5')

    // GPT-5参数转换
    if (isGPT5 || features.usesMaxCompletionTokens) {
      // max_tokens -> max_completion_tokens 转换
      if ('max_tokens' in opts && !('max_completion_tokens' in opts)) {
        opts.max_completion_tokens = opts.max_tokens
        delete opts.max_tokens
      }

      // 温度强制为1
      if (features.requiresTemperatureOne && 'temperature' in opts) {
        if (opts.temperature !== 1 && opts.temperature !== undefined) {
          opts.temperature = 1
        }
      }

      // 移除GPT-5不支持的参数
      if (isGPT5) {
        delete opts.frequency_penalty
        delete opts.presence_penalty
        delete opts.logit_bias
        delete opts.user
      }
    }
  }

  4. 端点回退机制

  多端点尝试：
  async function tryWithEndpointFallback(
    baseURL: string,
    opts: OpenAI.ChatCompletionCreateParams,
    headers: Record<string, string>,
    provider: string,
    proxy: any,
    signal?: AbortSignal,
  ): Promise<{ response: Response; endpoint: string }> {
    const endpointsToTry = []

    if (provider === 'minimax') {
      endpointsToTry.push('/text/chatcompletion_v2', '/chat/completions')
    } else {
      endpointsToTry.push('/chat/completions')
    }

    // 依次尝试所有端点
    for (const endpoint of endpointsToTry) {
      try {
        const response = await fetch(`${baseURL}${endpoint}`, {
          method: 'POST',
          headers,
          body: JSON.stringify(opts.stream ? { ...opts, stream: true } : opts),
          dispatcher: proxy,
          signal: signal,
        })

        if (response.ok) {
          return { response, endpoint }
        }
      } catch (error) {
        // 继续尝试下一个端点
      }
    }
  }

  5. 错误处理和自动修复

  错误检测器：
  interface ErrorHandler {
    type: ModelErrorType
    detect: ErrorDetector
    fix: ErrorFixer
  }

  const GPT5_ERROR_HANDLERS: ErrorHandler[] = [
    {
      type: ModelErrorType.MaxCompletionTokens,
      detect: errMsg => {
        const lowerMsg = errMsg.toLowerCase()
        return (
          lowerMsg.includes("unsupported parameter: 'max_tokens'") ||
          lowerMsg.includes("max_tokens") && lowerMsg.includes("max_completion_tokens")
        )
      },
      fix: async opts => {
        if ('max_tokens' in opts) {
          opts.max_completion_tokens = opts.max_tokens
          delete opts.max_tokens
        }
      },
    }
  ]

  6. 具体提供商适配示例

  DeepSeek适配：
  - 使用 provider: 'deepseek'
  - 自动检测为OpenAI兼容
  - 使用 /chat/completions 端点
  - 支持标准参数映射

  Qwen适配：
  - 使用 provider: 'qwen'
  - 自动检测为OpenAI兼容
  - 使用 /chat/completions 端点
  - 支持标准参数映射

  自定义API适配：
  - 使用 provider: 'custom-openai'
  - 支持自定义baseURL
  - 自动推断模型能力
  - 支持端点回退机制

  7. GPT-5 Responses API特殊处理

  GPT-5 API调用：
  export async function callGPT5ResponsesAPI(
    modelProfile: any,
    opts: any,
    signal?: AbortSignal,
  ): Promise<any> {
    // GPT-5使用不同的参数结构
    const responsesParams: any = {
      model: opts.model,
      input: opts.messages, // Responses API使用'input'而非'messages'
      max_completion_tokens: opts.max_completion_tokens,
      reasoning: {
        effort: opts.reasoning_effort || 'medium',
        generate_summary: true,
      }
    }
  }

  8. 模型配置系统 (src/utils/config.ts)

  提供商类型定义：
  export type ProviderType =
    | 'anthropic' | 'openai' | 'mistral' | 'deepseek' | 'kimi'
    | 'qwen' | 'glm' | 'minimax' | 'baidu-qianfan' | 'siliconflow'
    | 'bigdream' | 'opendev' | 'xai' | 'groq' | 'gemini'
    | 'ollama' | 'azure' | 'custom' | 'custom-openai'

  总结

  Kode项目通过以下机制实现多种大模型API入参兼容：

  1. 分层适配器架构：工厂模式根据模型能力选择合适适配器
  2. 智能能力推断：基于模型名称自动推断技术能力
  3. 参数映射转换：统一参数到各提供商特定参数的转换
  4. 端点回退机制：多端点尝试确保兼容性
  5. 错误自动修复：运行时检测并修复参数错误
  6. 提供商特定优化：为每个提供商提供最佳参数配置

bash 复制代码

1. 参数组装流程

  核心入口：queryLLM 函数

  // src/services/claude.ts
  export async function queryLLM(
    messages: (UserMessage | AssistantMessage)[],
    systemPrompt: string[],
    maxThinkingTokens: number,
    tools: Tool[],
    signal: AbortSignal,
    options: {
      safeMode: boolean
      model: string | ModelPointerType
      prependCLISysprompt: boolean
      toolUseContext?: ToolUseContext
    },
  ): Promise<AssistantMessage>

  参数结构：

  A. 消息历史 (messages)

  - 用户消息 (UserMessage): 包含用户输入的问题
  - 助手消息 (AssistantMessage): 包含AI的回复历史
  - 工具调用结果: 包含工具执行的结果

  B. 系统提示 (systemPrompt)

  // 包含：
  - 基础系统提示
  - AGENTS.md 项目上下文
  - 动态提醒信息
  - 工具使用说明

  C. 工具定义 (tools)

  // 每个工具包含：
  {
    name: string,           // 工具名称
    description: string,    // 工具描述
    input_schema: object,   // 输入参数schema
  }

  2. 模型适配器系统

  多模型支持架构

  // ModelAdapterFactory 根据模型配置选择适配器
  - ResponsesAPIAdapter: GPT-5等新模型
  - ChatCompletionsAdapter: 传统OpenAI兼容模型

  模型特定参数转换

  // 应用模型特定参数
  applyModelSpecificTransformations(opts)
  - GPT-5: max_completion_tokens, temperature=1
  - 传统模型: max_tokens, temperature=1

  3. 最终API参数组装

  Anthropic API格式

  {
    model: string,
    max_tokens: number,
    messages: MessageParam[],
    system: TextBlockParam[],
    tools: ToolSchema[],
    tool_choice: { type: 'auto' },
    thinking?: { max_tokens: number }
  }

  OpenAI兼容格式

  {
    model: string,
    max_tokens: number,
    messages: ChatCompletionMessageParam[],
    tools: ChatCompletionTool[],
    tool_choice: 'auto',
    reasoning_effort?: string
  }

bash 复制代码

 Kode 参数组装流程极其详细分析

  1. 用户输入处理流程

  1.1 用户输入接收 (src/components/PromptInput.tsx:340-351)

  // 处理用户输入并创建消息
  const messages = await processUserInput(
    input,
    mode,
    setToolJSX,
    {
      options: {
        commands,
        tools,
        verbose,
        maxThinkingTokens: 0,
        isKodingRequest: true,
        kodingContext,
      },
      messageId: undefined,
      abortController: abortController || new AbortController(),
      readFileTimestamps,
      setForkConvoWithMessagesOnTheNextRender,
    },
    pastedImage ?? null,
  )

  1.2 消息创建 (src/utils/messages.tsx:324-391)

  // 创建基础用户消息
  let userMessage: UserMessage

  if (pastedImage) {
    userMessage = createUserMessage([
      {
        type: 'image',
        source: {
          type: 'base64',
          media_type: 'image/png',
          data: pastedImage,
        },
      },
      {
        type: 'text',
        text: isKodingRequest && kodingContextInfo
          ? `${kodingContextInfo}\n\n${input}`
          : input,
      },
    ])
  } else {
    let processedInput = isKodingRequest && kodingContextInfo
      ? `${kodingContextInfo}\n\n${input}`
      : input

    // 处理动态内容（bash命令、mention等）
    if (input.includes('!`') || input.includes('@')) {
      // 执行bash命令和mention处理
      processedInput = await executeBashCommands(processedInput)
      await processMentions(input)
    }

    userMessage = createUserMessage(processedInput)
  }

  2. 查询流程参数组装

  2.1 查询入口 (src/query.ts:176-240)

  // 自动压缩检查
  const { messages: processedMessages, wasCompacted } = await checkAutoCompact(
    messages,
    toolUseContext,
  )
  if (wasCompacted) {
    messages = processedMessages
  }

  // 系统提示构建
  const { systemPrompt: fullSystemPrompt, reminders } =
    formatSystemPromptWithContext(systemPrompt, context, toolUseContext.agentId)

  // 将提醒注入到最新用户消息
  if (reminders && messages.length > 0) {
    // 找到最后一个用户消息并注入提醒
    for (let i = messages.length - 1; i >= 0; i--) {
      const msg = messages[i]
      if (msg?.type === 'user') {
        const lastUserMessage = msg as UserMessage
        messages[i] = {
          ...lastUserMessage,
          message: {
            ...lastUserMessage.message,
            content: typeof lastUserMessage.message.content === 'string'
              ? reminders + lastUserMessage.message.content
              : [
                  ...(Array.isArray(lastUserMessage.message.content)
                    ? lastUserMessage.message.content
                    : []),
                  { type: 'text', text: reminders },
                ],
          },
        }
        break
      }
    }
  }

  // 调用LLM
  function getAssistantResponse() {
    return queryLLM(
      normalizeMessagesForAPI(messages),
      fullSystemPrompt,
      toolUseContext.options.maxThinkingTokens,
      toolUseContext.options.tools,
      toolUseContext.abortController.signal,
      {
        safeMode: toolUseContext.options.safeMode ?? false,
        model: toolUseContext.options.model || 'main',
        prependCLISysprompt: true,
        toolUseContext: toolUseContext,
      },
    )
  }

  3. 系统提示构建

  3.1 系统提示增强 (src/services/claude.ts:1209-1278)

  export function formatSystemPromptWithContext(
    systemPrompt: string[],
    context: { [k: string]: string },
    agentId?: string,
  ): { systemPrompt: string[]; reminders: string } {
    const enhancedPrompt = [...systemPrompt]
    let reminders = ''

    // GPT-5 持久化支持
    const modelManager = getModelManager()
    const modelProfile = modelManager.getModel('main')
    if (modelProfile && isGPT5Model(modelProfile.modelName)) {
      const persistencePrompts = [
        "\n# Agent Persistence for Long-Running Coding Tasks",
        "You are working on a coding project that may involve multiple steps and iterations. Please maintain context
   and continuity throughout the session:",
        "- Remember architectural decisions and design patterns established earlier",
        "- Keep track of file modifications and their relationships",
        "- Maintain awareness of the overall project structure and goals",
        "- Reference previous implementations when making related changes",
        "- Ensure consistency with existing code style and conventions",
        "- Build incrementally on previous work rather than starting from scratch"
      ]
      enhancedPrompt.push(...persistencePrompts)
    }

    // 项目上下文注入
    const hasContext = Object.entries(context).length > 0
    if (hasContext) {
      // Kode 上下文直接注入系统提示
      const kodeContext = generateKodeContext()
      if (kodeContext) {
        enhancedPrompt.push('\n---\n# 项目上下文\n')
        enhancedPrompt.push(kodeContext)
        enhancedPrompt.push('\n---\n')
      }

      // 生成动态提醒
      const reminderMessages = generateSystemReminders(hasContext, agentId)
      if (reminderMessages.length > 0) {
        reminders = reminderMessages.map(r => r.content).join('\n') + '\n'
      }

      // 添加其他上下文
      enhancedPrompt.push(
        `\nAs you answer the user's questions, you can use the following context:\n`,
      )

      // 过滤已处理的上下文
      const filteredContext = Object.fromEntries(
        Object.entries(context).filter(
          ([key]) => key !== 'projectDocs' && key !== 'userDocs',
        ),
      )

      enhancedPrompt.push(
        ...Object.entries(filteredContext).map(
          ([key, value]) => `<context name="${key}">${value}</context>`,
        ),
      )
    }

    return { systemPrompt: enhancedPrompt, reminders }
  }

  4. 模型适配器参数转换

  4.1 适配器工厂 (src/services/modelAdapterFactory.ts:12-26)

  static createAdapter(modelProfile: ModelProfile): ModelAPIAdapter {
    const capabilities = getModelCapabilities(modelProfile.modelName)

    // 确定使用哪种API
    const apiType = this.determineAPIType(modelProfile, capabilities)

    // 创建对应的适配器
    switch (apiType) {
      case 'responses_api':
        return new ResponsesAPIAdapter(capabilities, modelProfile)
      case 'chat_completions':
      default:
        return new ChatCompletionsAdapter(capabilities, modelProfile)
    }
  }

  4.2 统一请求参数 (src/services/claude.ts:1939-1949)

  // 构建统一请求参数
  const unifiedParams: UnifiedRequestParams = {
    messages: openaiMessages,
    systemPrompt: openaiSystem.map(s => s.content as string),
    tools: tools,
    maxTokens: getMaxTokensFromProfile(modelProfile),
    stream: config.stream,
    reasoningEffort: reasoningEffort as any,
    temperature: isGPT5Model(model) ? 1 : MAIN_QUERY_TEMPERATURE,
    previousResponseId: toolUseContext?.responseState?.previousResponseId,
    verbosity: 'high' // 编码任务使用高详细度
  }

  5. 最终API请求参数

  5.1 Anthropic API 参数 (src/services/claude.ts:1468-1475)

  const params: Anthropic.Beta.Messages.MessageCreateParams = {
    model,
    max_tokens: getMaxTokensFromProfile(modelProfile),
    messages: processedMessages,
    system: processedSystem,
    tools: toolSchemas.length > 0 ? toolSchemas : undefined,
    tool_choice: toolSchemas.length > 0 ? { type: 'auto' } : undefined,
  }

  // 思考令牌支持
  if (maxThinkingTokens > 0) {
    (params as any).extra_headers = {
      'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
    }
    (params as any).thinking = { max_tokens: maxThinkingTokens }
  }

  5.2 OpenAI API 参数 (src/services/claude.ts:1896-1918)

  const opts: OpenAI.ChatCompletionCreateParams = {
    model,
    ...(isGPT5 ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens }),
    messages: [...openaiSystem, ...openaiMessages],
    temperature: isGPT5 ? 1 : MAIN_QUERY_TEMPERATURE,
  }

  // 流式支持
  if (config.stream) {
    (opts as OpenAI.ChatCompletionCreateParams).stream = true
    opts.stream_options = {
      include_usage: true,
    }
  }

  // 工具支持
  if (toolSchemas.length > 0) {
    opts.tools = toolSchemas
    opts.tool_choice = 'auto'
  }

  // 推理强度
  const reasoningEffort = await getReasoningEffort(modelProfile, messages)
  if (reasoningEffort) {
    opts.reasoning_effort = reasoningEffort
  }

  5.3 工具模式构建

  Anthropic 工具模式 (src/services/claude.ts:1430-1442):
  const toolSchemas = await Promise.all(
    tools.map(async tool =>
      ({
        name: tool.name,
        description: typeof tool.description === 'function'
          ? await tool.description()
          : tool.description,
        input_schema:'inputJSONSchema' in tool && tool.inputJSONSchema
          ? tool.inputJSONSchema
          : zodToJsonSchema(tool.inputSchema),
      }) as unknown as Anthropic.Beta.Messages.BetaTool,
    )
  )

  OpenAI 工具模式 (src/services/claude.ts:1845-1863):
  const toolSchemas = await Promise.all(
    tools.map(
      async _ =>
        ({
          type: 'function',
          function: {
            name: _.name,
            description: await _.prompt({
              safeMode: options?.safeMode,
            }),
            parameters:
              'inputJSONSchema' in _ && _.inputJSONSchema
                ? _.inputJSONSchema
                : zodToJsonSchema(_.inputSchema),
          },
        }) as OpenAI.ChatCompletionTool,
    ),
  )

  6. 完整的参数组装流程总结

  1. 用户输入处理 → 创建标准化的用户消息
  2. 自动压缩检查 → 处理上下文窗口限制
  3. 系统提示构建 → 注入项目上下文和动态提醒
  4. 消息规范化 → 转换为API兼容格式
  5. 模型解析 → 根据模型指针解析具体模型
  6. 适配器选择 → 根据模型能力选择API类型
  7. 参数转换 → 统一参数转换为特定API格式
  8. 工具构建 → 异步构建工具模式和描述
  9. API调用 → 发送最终请求参数

  7. 关键参数结构示例

  最终发送给 Anthropic API 的参数:
  {
    "model": "claude-3-5-sonnet-20241022",
    "max_tokens": 4096,
    "messages": [...processedMessages],
    "system": [...processedSystem],
    "tools": [...toolSchemas],
    "tool_choice": {"type": "auto"},
    "thinking": {"max_tokens": 1024}
  }

  最终发送给 OpenAI API 的参数:
  {
    "model": "gpt-4o",
    "max_tokens": 4096,
    "messages": [...openaiSystem, ...openaiMessages],
    "temperature": 0.7,
    "tools": [...toolSchemas],
    "tool_choice": "auto",
    "stream": true,
    "stream_options": {"include_usage": true}
  }

bash 复制代码

         1 +  # Kode 上下文窗口管理机制详解
         2 +
         3 +  ## 📋 概述
         4 +
         5 +  Kode 使用了一个**双层上下文管理机制**：
         6 +  1. **自动压缩（Auto-Compact）**：当对话接近模型上下文限制时，自动触发
         7 +  2. **消息截断（Message Truncation）**：提供4种策略，用于手动或特定场景的消息管理
         8 +
         9 +  ---
        10 +
        11 +  ## 🔄 完整流程图
        12 +
        13 +  ```
        14 +  用户发送消息
        15 +      ↓
        16 +  ┌──────────────────────────────────────────────────────┐
        17 +  │ query() 函数入口 (src/query.ts:161)                  │
        18 +  │ - 接收消息列表 messages                               │
        19 +  │ - 接收系统提示 systemPrompt                          │
        20 +  │ - 接收工具上下文 toolUseContext                      │
        21 +  └──────────────────────────────────────────────────────┘
        22 +      ↓
        23 +  ┌──────────────────────────────────────────────────────┐
        24 +  │ 第一步：自动压缩检查                                  │
        25 +  │ checkAutoCompact() (autoCompactCore.ts:113)          │
        26 +  └──────────────────────────────────────────────────────┘
        27 +      ↓
        28 +      判断是否需要压缩？
        29 +      ├─ 计算当前 token 总数 (countTokens)
        30 +      ├─ 获取主模型上下文限制 (contextLimit)
        31 +      └─ 检查：tokenCount >= contextLimit * 92%?
        32 +            │
        33 +            ├─ 否 → 跳过压缩，继续正常流程
        34 +            │
        35 +            └─ 是 → 执行自动压缩 ↓
        36 +
        37 +  ┌──────────────────────────────────────────────────────┐
        38 +  │ executeAutoCompact() (autoCompactCore.ts:146)        │
        39 +  │                                                       │
        40 +  │ 1. 生成压缩提示词 (COMPRESSION_PROMPT)               │
        41 +  │    - 技术上下文                                       │
        42 +  │    - 项目概览                                        │
        43 +  │    - 代码变更                                        │
        44 +  │    - 调试问题                                        │
        45 +  │    - 当前状态                                        │
        46 +  │    - 待办任务                                        │
        47 +  │    - 用户偏好                                        │
        48 +  │    - 关键决策                                        │
        49 +  │                                                       │
        50 +  │ 2. 调用 queryLLM 生成摘要                           │
        51 +  │    - 使用 'main' 模型指针                            │
        52 +  │    - 传入所有历史消息 + 压缩提示                     │
        53 +  │    - 获取结构化摘要                                  │
        54 +  │                                                       │
        55 +  │ 3. 恢复最近访问的文件                                │
        56 +  │    - selectAndReadFiles() 自动恢复                   │
        57 +  │    - 保留开发上下文                                  │
        58 +  │                                                       │
        59 +  │ 4. 构建压缩后的消息列表                              │
        60 +  │    - 压缩通知消息                                    │
        61 +  │    - AI 生成的摘要                                   │
        62 +  │    - 恢复的文件内容                                  │
        63 +  │                                                       │
        64 +  │ 5. 清理状态                                          │
        65 +  │    - 清空消息缓存                                    │
        66 +  │    - 清除上下文缓存                                  │
        67 +  │    - 重置文件新鲜度会话                              │
        68 +  └──────────────────────────────────────────────────────┘
        69 +      ↓
        70 +      返回压缩后的消息列表（通常只有 3-5 条消息）
        71 +      ↓
        72 +  ┌──────────────────────────────────────────────────────┐
        73 +  │ 第二步：构建系统提示                                  │
        74 +  │ formatSystemPromptWithContext() (claude.ts:1209)     │
        75 +  │ - 注入项目上下文                                     │
        76 +  │ - 添加代码风格指南                                   │
        77 +  │ - 生成系统提醒 (systemReminder)                      │
        78 +  └──────────────────────────────────────────────────────┘
        79 +      ↓
        80 +  ┌──────────────────────────────────────────────────────┐
        81 +  │ 第三步：调用 LLM                                     │
        82 +  │ queryLLM() (claude.ts:1097)                          │
        83 +  │                                                       │
        84 +  │ 1. 解析模型配置                                      │
        85 +  │    - 支持模型指针 (main/task/reasoning/quick)        │
        86 +  │    - 获取 ModelProfile                               │
        87 +  │                                                       │
        88 +  │ 2. 调用底层 API                                      │
        89 +  │    - queryLLMWithPromptCaching()                     │
        90 +  │    - 支持提示词缓存                                  │
        91 +  │    - 流式响应处理                                    │
        92 +  │                                                       │
        93 +  │ 3. 返回助手消息                                      │
        94 +  └──────────────────────────────────────────────────────┘
        95 +      ↓
        96 +      返回响应给用户
        97 +  ```
        98 +
        99 +  ---
       100 +
       101 +  ## 🎯 核心机制 1：自动压缩（Auto-Compact）
       102 +
       103 +  ### 触发条件
       104 +  ```typescript
       105 +  // src/utils/autoCompactCore.ts:18
       106 +  const AUTO_COMPACT_THRESHOLD_RATIO = 0.92
       107 +
       108 +  // 当满足以下条件时触发：
       109 +  // 1. messages.length >= 3 (至少3条消息)
       110 +  // 2. tokenCount >= contextLimit * 0.92 (使用92%上下文)
       111 +  ```
       112 +
       113 +  ### 触发时机
       114 +  在 `query()` 函数的**最开始**（src/query.ts:177）：
       115 +  ```typescript
       116 +  export async function* query(...) {
       117 +    // 第一步：自动压缩检查
       118 +    const { messages: processedMessages, wasCompacted } = await checkAutoCompact(
       119 +      messages,
       120 +      toolUseContext,
       121 +    )
       122 +    if (wasCompacted) {
       123 +      messages = processedMessages
       124 +    }
       125 +
       126 +    // ... 后续处理
       127 +  }
       128 +  ```
       129 +
       130 +  ### 压缩流程详解
       131 +
       132 +  #### 1️⃣ **检查是否需要压缩**
       133 +  ```typescript
       134 +  // autoCompactCore.ts:89
       135 +  async function shouldAutoCompact(messages: Message[]): Promise<boolean> {
       136 +    if (messages.length < 3) return false  // 太少消息不压缩
       137 +
       138 +    const tokenCount = countTokens(messages)  // 计算 token 总数
       139 +    const { isAboveAutoCompactThreshold } = await calculateThresholds(tokenCount)
       140 +
       141 +    return isAboveAutoCompactThreshold  // 超过 92% 返回 true
       142 +  }
       143 +  ```
       144 +
       145 +  #### 2️⃣ **获取模型上下文限制**
       146 +  ```typescript
       147 +  // autoCompactCore.ts:24
       148 +  async function getCompressionModelContextLimit(): Promise<number> {
       149 +    const modelManager = getModelManager()
       150 +    const modelProfile = modelManager.getModel('main')  // 使用主模型
       151 +
       152 +    if (modelProfile?.contextLength) {
       153 +      return modelProfile.contextLength  // 返回配置的上下文长度
       154 +    }
       155 +
       156 +    return 200_000  // 默认 200k tokens
       157 +  }
       158 +  ```
       159 +
       160 +  #### 3️⃣ **生成压缩摘要**
       161 +  ```typescript
       162 +  // autoCompactCore.ts:146
       163 +  async function executeAutoCompact(messages, toolUseContext) {
       164 +    // 1. 创建压缩提示
       165 +    const summaryRequest = createUserMessage(COMPRESSION_PROMPT)
       166 +
       167 +    // 2. 调用 LLM 生成摘要
       168 +    const summaryResponse = await queryLLM(
       169 +      normalizeMessagesForAPI([...messages, summaryRequest]),
       170 +      ['You are a helpful AI assistant tasked with creating comprehensive conversation
           + summaries...'],
       171 +      0,
       172 +      toolUseContext.options.tools,
       173 +      toolUseContext.abortController.signal,
       174 +      {
       175 +        safeMode: false,
       176 +        model: 'main',  // 使用主模型生成高质量摘要
       177 +        prependCLISysprompt: true,
       178 +      },
       179 +    )
       180 +
       181 +    // 3. 提取摘要文本
       182 +    const summary = extractTextFromContent(summaryResponse.message.content)
       183 +
       184 +    // 4. 恢复最近访问的文件（保持开发上下文）
       185 +    const recoveredFiles = await selectAndReadFiles()
       186 +
       187 +    // 5. 构建压缩后的消息列表
       188 +    const compactedMessages = [
       189 +      createUserMessage('Context automatically compressed due to token limit...'),
       190 +      summaryResponse,  // AI 生成的摘要
       191 +      ...recoveredFiles.map(file => createUserMessage(`**Recovered File: ${file.path}**...`))
       192 +    ]
       193 +
       194 +    // 6. 清理状态
       195 +    getMessagesSetter()([])  // 清空内存中的消息
       196 +    getContext.cache.clear?.()  // 清除上下文缓存
       197 +    getCodeStyle.cache.clear?.()  // 清除代码风格缓存
       198 +    resetFileFreshnessSession()  // 重置文件新鲜度
       199 +
       200 +    return compactedMessages  // 返回压缩后的消息（通常只有 3-5 条）
       201 +  }
       202 +  ```
       203 +
       204 +  #### 4️⃣ **压缩提示词（COMPRESSION_PROMPT）**
       205 +  ```typescript
       206 +  // autoCompactCore.ts:41
       207 +  const COMPRESSION_PROMPT = `Please provide a comprehensive summary of our conversation structured
           + as follows:
       208 +
       209 +  ## Technical Context
       210 +  Development environment, tools, frameworks, and configurations in use...
       211 +
       212 +  ## Project Overview
       213 +  Main project goals, features, and scope...
       214 +
       215 +  ## Code Changes
       216 +  Files created, modified, or analyzed during our conversation...
       217 +
       218 +  ## Debugging & Issues
       219 +  Problems encountered and their root causes...
       220 +
       221 +  ## Current Status
       222 +  What we just completed successfully...
       223 +
       224 +  ## Pending Tasks
       225 +  Immediate next steps and priorities...
       226 +
       227 +  ## User Preferences
       228 +  Coding style, formatting, and organizational preferences...
       229 +
       230 +  ## Key Decisions
       231 +  Important technical decisions made and their rationale...
       232 +
       233 +  Focus on information essential for continuing the conversation effectively...`
       234 +  ```
       235 +
       236 +  ### 压缩效果示例
       237 +
       238 +  **压缩前**：100 条消息，180,000 tokens
       239 +  ```
       240 +  [User] 帮我创建一个 React 项目
       241 +  [Assistant] 好的，我将创建...
       242 +  [User] 添加路由功能
       243 +  [Assistant] 我将添加 React Router...
       244 +  [Tool] 文件创建成功...
       245 +  [User] 修复这个 bug...
       246 +  [Assistant] 我发现问题在...
       247 +  ... (95 more messages)
       248 +  ```
       249 +
       250 +  **压缩后**：3-5 条消息，约 15,000 tokens
       251 +  ```
       252 +  [User] Context automatically compressed due to token limit. Essential information preserved.
       253 +
       254 +  [Assistant] ## Technical Context
       255 +  - React 18.2.0 项目，使用 TypeScript
       256 +  - 依赖：React Router 6.4, Axios, Zustand
       257 +  - 开发工具：Vite, ESLint, Prettier
       258 +
       259 +  ## Project Overview
       260 +  创建了一个多页面 React SPA，包含用户认证、商品列表和详情页...
       261 +
       262 +  ## Code Changes
       263 +  - 创建 src/App.tsx (路由配置)
       264 +  - 创建 src/pages/Login.tsx (登录页)
       265 +  - 修复 src/utils/auth.ts 的 token 验证 bug
       266 +
       267 +  ## Current Status
       268 +  已完成用户认证和商品列表，测试通过
       269 +
       270 +  ## Pending Tasks
       271 +  1. 实现购物车功能
       272 +  2. 添加订单历史页面
       273 +
       274 +  ## Key Decisions
       275 +  - 使用 Zustand 而非 Redux（更轻量）
       276 +  - API 基础 URL 设为环境变量
       277 +
       278 +  [User] **Recovered File: src/App.tsx**
       279 +  ```tsx
       280 +  import { BrowserRouter, Routes, Route } from 'react-router-dom'
       281 +  ...
       282 +  ```
       283 +  ```
       284 +
       285 +  ---
       286 +
       287 +  ## 🎯 核心机制 2：消息截断策略
       288 +
       289 +  虽然当前代码中 `MessageContextManager` **没有被直接调用**，但它提供了 4
           + 种备用策略，可用于未来扩展或特定场景：
       290 +
       291 +  ### 策略 1：保留最近消息（preserve_recent）
       292 +  ```typescript
       293 +  // messageContextManager.ts:54
       294 +  private preserveRecentMessages(messages, strategy) {
       295 +    // 计算应该保留多少条消息
       296 +    const preserveCount = strategy.preserveCount ||
       297 +                          this.estimateMessageCount(strategy.maxTokens)
       298 +
       299 +    // 直接裁剪，保留最后 N 条
       300 +    const truncatedMessages = messages.slice(-preserveCount)
       301 +
       302 +    return {
       303 +      truncatedMessages,
       304 +      removedCount: messages.length - truncatedMessages.length,
       305 +      preservedTokens: countTokens(truncatedMessages),
       306 +    }
       307 +  }
       308 +  ```
       309 +
       310 +  **适用场景**：快速截断，适合对话连贯性要求高的场景
       311 +
       312 +  ### 策略 2：保留重要消息（preserve_important）
       313 +  ```typescript
       314 +  // messageContextManager.ts:78
       315 +  private preserveImportantMessages(messages, strategy) {
       316 +    const importantMessages = []
       317 +    const recentMessages = messages.slice(-5)  // 最后 5 条消息
       318 +
       319 +    // 从旧消息中提取重要消息
       320 +    for (let i = 0; i < messages.length - 5; i++) {
       321 +      const message = messages[i]
       322 +      if (this.isImportantMessage(message)) {
       323 +        importantMessages.push(message)
       324 +      }
       325 +    }
       326 +
       327 +    // 合并并去重
       328 +    const combined = [...importantMessages, ...recentMessages]
       329 +    const unique = deduplicateMessages(combined)
       330 +
       331 +    // 按原始顺序排序
       332 +    const truncatedMessages = unique.sort((a, b) => {
       333 +      return messages.indexOf(a) - messages.indexOf(b)
       334 +    })
       335 +
       336 +    return { truncatedMessages, ... }
       337 +  }
       338 +
       339 +  // 判断消息是否重要
       340 +  private isImportantMessage(message) {
       341 +    if (message.type === 'user') return true  // 用户消息总是重要
       342 +
       343 +    // 检查是否包含错误关键词
       344 +    const text = extractText(message).toLowerCase()
       345 +    return text.includes('error') ||
       346 +           text.includes('failed') ||
       347 +           text.includes('warning') ||
       348 +           text.includes('critical') ||
       349 +           text.includes('issue')
       350 +  }
       351 +  ```
       352 +
       353 +  **适用场景**：保留关键决策和错误信息，适合调试场景
       354 +
       355 +  ### 策略 3：智能压缩（smart_compression）
       356 +  ```typescript
       357 +  // messageContextManager.ts:126
       358 +  private async smartCompressionStrategy(messages, strategy) {
       359 +    // 保留最近 30% 的消息
       360 +    const recentCount = Math.min(10, Math.floor(messages.length * 0.3))
       361 +    const recentMessages = messages.slice(-recentCount)
       362 +    const olderMessages = messages.slice(0, -recentCount)
       363 +
       364 +    // 为旧消息创建摘要
       365 +    const summary = this.createMessagesSummary(olderMessages)
       366 +
       367 +    // 生成摘要消息
       368 +    const summaryMessage = {
       369 +      type: 'assistant',
       370 +      message: {
       371 +        role: 'assistant',
       372 +        content: [
       373 +          {
       374 +            type: 'text',
       375 +            text: `[CONVERSATION SUMMARY - ${olderMessages.length} messages compressed]
       376 +
       377 +  ${summary}
       378 +
       379 +  [END SUMMARY - Recent context follows...]`,
       380 +          },
       381 +        ],
       382 +      },
       383 +      costUSD: 0,
       384 +      durationMs: 0,
       385 +      uuid: randomUUID(),
       386 +    }
       387 +
       388 +    // 返回：摘要消息 + 最近消息
       389 +    return {
       390 +      truncatedMessages: [summaryMessage, ...recentMessages],
       391 +      removedCount: olderMessages.length,
       392 +    }
       393 +  }
       394 +
       395 +  // 创建简单摘要（不调用 LLM）
       396 +  private createMessagesSummary(messages) {
       397 +    const userMessages = messages.filter(m => m.type === 'user').length
       398 +    const assistantMessages = messages.filter(m => m.type === 'assistant').length
       399 +    const toolUses = messages.filter(m => hasToolUse(m)).length
       400 +
       401 +    // 提取关键主题
       402 +    const topics = extractTopics(messages)
       403 +
       404 +    return `Previous conversation included ${userMessages} user messages and ${assistantMessages}
           + assistant responses, with ${toolUses} tool invocations. Key topics: ${topics.join(', ')}.`
       405 +  }
       406 +  ```
       407 +
       408 +  **适用场景**：需要保留上下文但不调用 LLM 的场景
       409 +
       410 +  ### 策略 4：自动压缩（auto_compact）
       411 +  ```typescript
       412 +  // messageContextManager.ts:168
       413 +  private async autoCompactStrategy(messages, strategy) {
       414 +    // 目前回退到 preserve_recent
       415 +    // 未来可以集成 autoCompactCore.ts
       416 +    return this.preserveRecentMessages(messages, strategy)
       417 +  }
       418 +  ```
       419 +
       420 +  **适用场景**：桥接到主自动压缩系统
       421 +
       422 +  ---
       423 +
       424 +  ## 📊 Token 计数机制
       425 +
       426 +  ### Token 计数实现
       427 +  ```typescript
       428 +  // src/utils/tokens.ts
       429 +  import { encode } from 'gpt-tokenizer'  // tiktoken 的 JS 实现
       430 +
       431 +  export function countTokens(messages: Message[]): number {
       432 +    let total = 0
       433 +
       434 +    for (const msg of messages) {
       435 +      // 提取消息文本
       436 +      const text = extractTextFromMessage(msg)
       437 +
       438 +      // 使用 tiktoken 编码
       439 +      const tokens = encode(text)
       440 +      total += tokens.length
       441 +
       442 +      // 添加消息格式开销
       443 +      total += 4  // <|im_start|>role<|im_sep|>content<|im_end|>
       444 +    }
       445 +
       446 +    total += 2  // 对话开始和结束标记
       447 +
       448 +    return total
       449 +  }
       450 +  ```
       451 +
       452 +  ### 不同内容的 Token 计数
       453 +
       454 +  **示例 1：简单文本**
       455 +  ```javascript
       456 +  countTokens([
       457 +    { type: 'user', message: { content: 'Hello' } }
       458 +  ])
       459 +  // ≈ 6 tokens (1 word + 4 message overhead + 2 conversation overhead)
       460 +  ```
       461 +
       462 +  **示例 2：代码块**
       463 +  ```javascript
       464 +  countTokens([
       465 +    { type: 'user', message: { content: '```python\nprint("hello")\n```' } }
       466 +  ])
       467 +  // ≈ 15 tokens (代码 tokens 较多)
       468 +  ```
       469 +
       470 +  **示例 3：完整对话**
       471 +  ```javascript
       472 +  countTokens([
       473 +    { type: 'user', message: { content: '帮我创建一个 React 项目' } },
       474 +    { type: 'assistant', message: { content: '好的，我将使用 create-react-app...' } },
       475 +    { type: 'user', message: { content: '添加路由功能' } },
       476 +  ])
       477 +  // ≈ 50-60 tokens
       478 +  ```
       479 +
       480 +  ---
       481 +
       482 +  ## 🔍 实际运行示例
       483 +
       484 +  ### 场景：长对话触发自动压缩
       485 +
       486 +  ```
       487 +  [时刻 0] 用户开始对话
       488 +  messages.length = 1
       489 +  tokenCount = 50
       490 +  contextLimit = 200,000
       491 +  使用率 = 0.025%
       492 +  → 继续正常流程
       493 +
       494 +  [时刻 1] 对话进行中
       495 +  messages.length = 20
       496 +  tokenCount = 5,000
       497 +  使用率 = 2.5%
       498 +  → 继续正常流程
       499 +
       500 +  [时刻 2] 对话逐渐变长
       501 +  messages.length = 50
       502 +  tokenCount = 50,000
       503 +  使用率 = 25%
       504 +  → 继续正常流程
       505 +
       506 +  [时刻 3] 接近上下文限制
       507 +  messages.length = 100
       508 +  tokenCount = 185,000
       509 +  使用率 = 92.5%
       510 +  → 触发自动压缩！
       511 +
       512 +  [压缩过程]
       513 +  1. 检测到 tokenCount >= 200,000 * 0.92 = 184,000 ✓
       514 +  2. 调用 executeAutoCompact()
       515 +  3. 生成压缩提示：COMPRESSION_PROMPT
       516 +  4. 调用 queryLLM 生成摘要（使用主模型）
       517 +  5. 恢复最近访问的文件（如 src/App.tsx）
       518 +  6. 构建新消息列表：
       519 +     - 消息 1: 压缩通知
       520 +     - 消息 2: AI 生成的摘要（约 3,000 tokens）
       521 +     - 消息 3-5: 恢复的文件内容（约 5,000 tokens）
       522 +  7. 清理缓存
       523 +
       524 +  [压缩后]
       525 +  messages.length = 4
       526 +  tokenCount = 8,000
       527 +  使用率 = 4%
       528 +  → 继续正常流程，可以再对话很久！
       529 +  ```
       530 +
       531 +  ### 压缩触发日志示例
       532 +
       533 +  ```bash
       534 +  [DEBUG] Auto-compact check: 185,000 / 200,000 tokens (92.5%)
       535 +  [INFO] Auto-compact triggered! Compressing conversation...
       536 +  [DEBUG] Generating summary with 100 messages
       537 +  [DEBUG] Summary generated: 3,245 tokens
       538 +  [DEBUG] Recovering recent files: 3 files found
       539 +  [DEBUG] File recovered: src/App.tsx (1,234 tokens)
       540 +  [DEBUG] File recovered: src/components/Header.tsx (892 tokens)
       541 +  [DEBUG] File recovered: src/utils/auth.ts (567 tokens)
       542 +  [INFO] Auto-compact complete: 100 messages → 4 messages
       543 +  [INFO] Token reduction: 185,000 → 8,193 (95.6% reduction)
       544 +  ```
       545 +
       546 +  ---
       547 +
       548 +  ## ⚙️ 配置选项
       549 +
       550 +  ### 自动压缩阈值配置
       551 +  ```typescript
       552 +  // autoCompactCore.ts:18
       553 +  const AUTO_COMPACT_THRESHOLD_RATIO = 0.92
       554 +
       555 +  // 可以修改为：
       556 +  // 0.80 = 更积极压缩（80% 时触发）
       557 +  // 0.95 = 更保守压缩（95% 时触发）
       558 +  ```
       559 +
       560 +  ### 模型上下文限制配置
       561 +  ```typescript
       562 +  // 通过 ModelProfile 配置
       563 +  {
       564 +    modelName: 'claude-3-5-sonnet-20240620',
       565 +    contextLength: 200000,  // 200k tokens
       566 +  }
       567 +
       568 +  {
       569 +    modelName: 'gpt-4o',
       570 +    contextLength: 128000,  // 128k tokens
       571 +  }
       572 +  ```
       573 +
       574 +  ### 用户偏好配置（未来扩展）
       575 +  ```typescript
       576 +  // messageContextManager.ts:267
       577 +  export function createRetentionStrategy(
       578 +    targetContextLength: number,
       579 +    currentTokens: number,
       580 +    userPreference: 'aggressive' | 'balanced' | 'conservative' = 'balanced',
       581 +  ) {
       582 +    const maxTokens = Math.floor(targetContextLength * 0.7)
       583 +
       584 +    switch (userPreference) {
       585 +      case 'aggressive':
       586 +        return {
       587 +          type: 'preserve_recent',
       588 +          maxTokens,
       589 +          preserveCount: Math.max(3, Math.floor(maxTokens / 200)),
       590 +        }
       591 +      case 'conservative':
       592 +        return {
       593 +          type: 'smart_compression',
       594 +          maxTokens,
       595 +        }
       596 +      case 'balanced':
       597 +      default:
       598 +        return {
       599 +          type: 'preserve_important',
       600 +          maxTokens,
       601 +          preserveCount: Math.max(5, Math.floor(maxTokens / 150)),
       602 +        }
       603 +    }
       604 +  }
       605 +  ```
       606 +
       607 +  ---
       608 +
       609 +  ## 🎯 关键设计决策
       610 +
       611 +  ### 1. 为什么在 query() 开始就检查？
       612 +  **原因**：
       613 +  - 避免发送超出上下文限制的请求给 LLM
       614 +  - 提前处理可以节省 API 调用成本
       615 +  - 用户体验更好（不会遇到 API 错误）
       616 +
       617 +  ### 2. 为什么使用 92% 作为阈值？
       618 +  **原因**：
       619 +  - 留 8% 空间给系统提示、工具定义等
       620 +  - 避免在 token 计数误差时超限
       621 +  - 提前触发可以生成更好的摘要
       622 +
       623 +  ### 3. 为什么压缩时使用主模型？
       624 +  **原因**：
       625 +  - 生成高质量摘要需要强大的模型
       626 +  - 主模型通常是最强的（如 Claude Sonnet）
       627 +  - 摘要质量直接影响后续对话效果
       628 +
       629 +  ### 4. 为什么恢复最近访问的文件？
       630 +  **原因**：
       631 +  - 保持代码开发的连续性
       632 +  - 用户正在编辑的文件很重要
       633 +  - 自动恢复比手动重新读取更好
       634 +
       635 +  ### 5. 为什么清理缓存？
       636 +  **原因**：
       637 +  - 压缩后上下文已改变，旧缓存可能失效
       638 +  - 避免内存泄漏
       639 +  - 确保新对话基于压缩后的状态
       640 +
       641 +  ---
       642 +
       643 +  ## 🚀 性能优化
       644 +
       645 +  ### Token 计数缓存
       646 +  ```typescript
       647 +  // 当前实现每次都重新计算
       648 +  const tokenCount = countTokens(messages)
       649 +
       650 +  // 优化建议：增量计数
       651 +  class TokenCounter {
       652 +    private cache = new Map<string, number>()
       653 +
       654 +    count(messages: Message[]): number {
       655 +      let total = 0
       656 +      for (const msg of messages) {
       657 +        const key = msg.uuid
       658 +        if (!this.cache.has(key)) {
       659 +          this.cache.set(key, this.countSingleMessage(msg))
       660 +        }
       661 +        total += this.cache.get(key)!
       662 +      }
       663 +      return total + 2  // 对话开销
       664 +    }
       665 +  }
       666 +  ```
       667 +
       668 +  ### 压缩频率优化
       669 +  ```typescript
       670 +  // 避免频繁压缩
       671 +  let lastCompactTime = 0
       672 +  const MIN_COMPACT_INTERVAL = 60_000  // 60 秒
       673 +
       674 +  if (Date.now() - lastCompactTime < MIN_COMPACT_INTERVAL) {
       675 +    return { messages, wasCompacted: false }
       676 +  }
       677 +  ```
       678 +
       679 +  ---
       680 +
       681 +  ## 📝 总结
       682 +
       683 +  ### Kode 的上下文管理策略
       684 +
       685 +  1. **主要机制**：自动压缩（Auto-Compact）
       686 +     - 在 92% 上下文使用率时触发
       687 +     - 使用主模型生成结构化摘要
       688 +     - 自动恢复最近文件
       689 +     - 清理旧状态
       690 +
       691 +  2. **备用机制**：消息截断策略（未启用）
       692 +     - 4 种策略可选
       693 +     - 适用于不同场景
       694 +     - 可作为未来扩展
       695 +
       696 +  3. **Token 计数**：基于 tiktoken
       697 +     - 精确计算 token 数
       698 +     - 考虑消息格式开销
       699 +     - 支持多种内容类型
       700 +
       701 +  4. **优势**：
       702 +     - ✅ 自动化：无需用户干预
       703 +     - ✅ 智能化：保留重要上下文
       704 +     - ✅ 可靠性：优雅降级处理
       705 +     - ✅ 高效性：95%+ 的 token 减少
       706 +
       707 +  5. **劣势**：
       708 +     - ❌ 压缩需要额外 API 调用（消耗成本）
       709 +     - ❌ 摘要可能丢失细节
       710 +     - ❌ 固定阈值可能不适合所有场景
       711 +
       712 +  ---
       713 +
       714 +  ## 🔗 相关文件索引
       715 +
       716 +  | 文件 | 功能 | 关键函数 |
       717 +  |------|------|----------|
       718 +  | `src/query.ts` | 查询入口 | `query()` |
       719 +  | `src/utils/autoCompactCore.ts` | 自动压缩核心 | `checkAutoCompact()`, `executeAutoCompact()` |
       720 +  | `src/utils/messageContextManager.ts` | 消息截断策略 | `truncateMessages()`, 4种策略 |
       721 +  | `src/utils/tokens.ts` | Token 计数 | `countTokens()` |
       722 +  | `src/services/claude.ts` | LLM 调用 | `queryLLM()`, `formatSystemPromptWithContext()` |
       723 +  | `src/utils/fileRecoveryCore.ts` | 文件恢复 | `selectAndReadFiles()` |
       724 +
       725 +  ---
       726 +
       727 +  **文档生成时间**: 2025-11-03
       728 +  **基于版本**: Kode v1.1.23

bash 复制代码

供几个关键流程的 Mermaid 图：

  1. 系统总体架构图

  graph TB
      subgraph Browser["浏览器端 (Vue 2)"]
          UI["UI 层<br/>ChatView/ModelManager/AgentManager"]
          Store["状态管理层 (Vuex)<br/>chat/models/agents/context"]
          Services["业务逻辑层<br/>WebSocket/MessageProcessor/ContextManager"]
          Utils["工具层<br/>tokenCounter/storage/markdown"]
      end

      subgraph Backend["Go 后端服务"]
          Router["路由层<br/>/api/chat, /ws/chat, /api/models"]
          BizLogic["业务逻辑层<br/>AIService/ContextService/TaskManager"]
          Adapters["AI 适配器<br/>ClaudeAdapter/OpenAIAdapter"]
          WSHub["WebSocket Hub<br/>连接管理/消息分发"]
      end

      subgraph AI["AI 服务提供商"]
          Claude["Claude API"]
          OpenAI["OpenAI API"]
          Custom["自定义 API"]
      end

      UI --> Store
      Store --> Services
      Services --> Utils

      Services -->|WebSocket| WSHub
      Services -->|HTTP API| Router

      Router --> BizLogic
      BizLogic --> Adapters
      WSHub --> BizLogic

      Adapters --> Claude
      Adapters --> OpenAI
      Adapters --> Custom

      style Browser fill:#e1f5ff
      style Backend fill:#fff4e1
      style AI fill:#f0f0f0

  2. 对话消息流程图

  sequenceDiagram
      participant User as 用户
      participant UI as ChatView.vue
      participant Store as Vuex Store
      participant WS as WebSocket
      participant Backend as Go Handler
      participant AI as AI Service
      participant LLM as Claude/GPT API

      User->>UI: 输入消息并发送
      UI->>Store: dispatch('sendMessage')

      Store->>Store: 验证输入
      Store->>Store: Token 计数

      alt Token 超限
          Store->>Store: 执行上下文截断
          Note over Store: preserve_important 策略
      end

      Store->>WS: send({type:'chat', messages, model})
      WS->>Backend: WebSocket 消息

      Backend->>Backend: 解析请求
      Backend->>AI: StreamChat(messages, model)
      AI->>LLM: HTTP POST /v1/messages (stream=true)

      loop 流式响应
          LLM-->>AI: {type:'content_block_delta', delta:{text}}
          AI-->>Backend: StreamChunk{content, done=false}
          Backend-->>WS: {type:'stream', content}
          WS-->>Store: 更新 currentStreamContent
          Store-->>UI: 触发打字动画
          UI-->>User: 显示流式内容
      end

      LLM-->>AI: {type:'message_stop'}
      AI-->>Backend: StreamChunk{done=true}
      Backend-->>WS: {type:'stream', done=true, metadata}

      WS->>Store: 流式完成
      Store->>Store: 将流式内容转为正式消息

      alt 检测到工具调用
          Store->>Store: 识别 tool_use 块
          Store->>Backend: 执行工具
          Backend-->>Store: 工具结果
          Store->>WS: 发送工具结果
          Note over WS,LLM: 重复对话流程
      end

      Store-->>UI: 更新消息列表
      UI-->>User: 显示完整响应

  3. 上下文管理流程图

  flowchart TD
      Start([用户发送新消息]) --> GetMessages[获取当前消息列表]
      GetMessages --> CountTokens[计算总 Token 数]

      CountTokens --> CheckLimit{Token 数 > maxTokens?}

      CheckLimit -->|否| SendDirect[直接发送给 AI]
      CheckLimit -->|是| SelectStrategy[选择截断策略]

      SelectStrategy --> Strategy{策略类型}

      Strategy -->|preserve_recent| Recent[保留最近 N 条消息]
      Strategy -->|preserve_important| Important[保留重要消息<br/>+最后5条]
      Strategy -->|smart_compression| Compress[生成摘要<br/>+保留最近30%]
      Strategy -->|auto_compact| AutoCompact[调用后端压缩]

      Recent --> CalcCount[estimateMessageCount<br/>avgTokens=150]
      CalcCount --> SliceRecent[messages.slice -preserveCount]
      SliceRecent --> Truncated

      Important --> FilterUser[提取所有用户消息]
      FilterUser --> FilterError[提取包含 error/failed<br/>的助手消息]
      FilterError --> GetLast5[获取最后5条消息]
      GetLast5 --> Merge[合并去重]
      Merge --> Sort[按原顺序排序]
      Sort --> Truncated

      Compress --> Split[分离最近10条<br/>和旧消息]
      Split --> GenSummary[生成简单摘要<br/>统计消息数/主题]
      GenSummary --> CreateSummaryMsg[创建摘要消息]
      CreateSummaryMsg --> CombineRecent[摘要 + 最近消息]
      CombineRecent --> Truncated

      AutoCompact --> BackendAPI[POST /api/context/compress]
      BackendAPI --> BackendProcess[后端调用 AI 生成<br/>结构化摘要]
      BackendProcess --> RecoverFiles[恢复最近文件]
      RecoverFiles --> Truncated

      Truncated[截断后的消息列表] --> RecountTokens[重新计数 Token]
      RecountTokens --> UpdateStore[更新 Vuex Store]
      UpdateStore --> SendDirect

      SendDirect --> End([发送给 AI API])

      style CheckLimit fill:#ffe6e6
      style Strategy fill:#e6f3ff
      style Truncated fill:#e6ffe6

  4. TaskTool 子任务执行流程图

  sequenceDiagram
      participant Main as 主对话
      participant Store as agents Store
      participant WS as WebSocket
      participant Backend as Task Handler
      participant AI as AI Service
      participant LLM as AI Model

      Main->>Main: AI 检测到需要子任务
      Main->>Main: 生成 tool_use 块<br/>{type:'task', agentType, prompt}

      Main->>Store: executeTask(agentType, taskPrompt)
      Store->>Store: 查找代理配置

      alt 代理不存在
          Store-->>Main: Error: Agent not found
      end

      Store->>Store: 构建任务请求<br/>{systemPrompt, tools, model, temperature}
      Store->>WS: send({type:'task', ...})

      WS->>Backend: TaskRequest
      Backend->>Backend: 创建 TaskContext<br/>独立上下文
      Backend->>Backend: 过滤工具权限<br/>根据 agent.tools

      Backend-->>WS: {type:'task_progress', status:'started'}
      WS-->>Store: 更新任务历史
      Store-->>Main: 显示进度

      Backend->>AI: StreamChatWithContext<br/>(独立消息列表)
      AI->>LLM: POST /v1/messages

      loop 流式响应
          LLM-->>AI: 文本内容 chunk
          AI-->>Backend: StreamChunk
          Backend-->>WS: {type:'task_stream', content}
          WS-->>Main: 显示子任务进度

          alt AI 调用工具
              LLM-->>AI: tool_use 块
              AI-->>Backend: ToolCall detected
              Backend->>Backend: 检查工具权限<br/>canUseTool(toolName, agent.tools)

              alt 有权限
                  Backend->>Backend: executeToolCall(toolUse)
                  Backend-->>WS: {type:'task_tool_result'}
                  Backend->>AI: 添加 tool_result 到上下文
                  Note over AI,LLM: 继续对话
              else 无权限
                  Backend-->>WS: {type:'task_error', message:'Permission denied'}
              end
          end
      end

      LLM-->>AI: 完成
      AI-->>Backend: Done

      Backend->>Backend: 汇总结果
      Backend-->>WS: {type:'task_result', result, metadata}

      WS->>Store: 任务完成
      Store->>Store: 更新任务历史<br/>status='completed'
      Store-->>Main: 返回结果

      Main->>Main: 将结果作为 tool_result<br/>添加到主对话
      Main->>WS: 继续主对话

  5. 多模型配置与切换流程图

  flowchart TD
      Start([用户操作]) --> Action{操作类型}

      Action -->|添加模型| AddModel[打开添加模型对话框]
      Action -->|切换模型| SwitchModel[点击模型切换按钮]
      Action -->|编辑模型| EditModel[选择模型编辑]

      AddModel --> Form[填写模型信息表单]
      Form --> FormData[name, modelName, provider<br/>apiKey, baseURL<br/>contextLength, maxTokens]
      FormData --> Validate{验证输入}

      Validate -->|失败| ShowError[显示错误提示]
      ShowError --> Form

      Validate -->|成功| TestConnection[测试连接<br/>POST /api/models/test]
      TestConnection --> TestResult{测试结果}

      TestResult -->|失败| ShowTestError[显示连接错误]
      ShowTestError --> Form

      TestResult -->|成功| SaveModel[保存到 LocalStorage<br/>+同步到后端]
      SaveModel --> UpdateList[更新模型列表]
      UpdateList --> End1([完成])

      SwitchModel --> GetCurrent[获取当前模型]
      GetCurrent --> GetAll[获取所有配置的模型]
      GetAll --> FindNext[找到下一个模型<br/>循环切换]

      FindNext --> CheckContext{检查上下文兼容性}
      CheckContext -->|当前 tokens < 新模型 contextLength * 0.8| Compatible[兼容]
      CheckContext -->|超限| Overflow[上下文溢出警告]

      Compatible --> SetPointer[设置 modelPointers.main]
      Overflow --> SetPointer
      Overflow --> WarnUser[显示警告信息<br/>上下文使用率 X%]

      SetPointer --> UpdateUI[更新 UI 显示]
      UpdateUI --> End2([切换完成])

      EditModel --> LoadData[加载模型数据]
      LoadData --> Form

      style TestConnection fill:#fff4e1
      style CheckContext fill:#ffe6e6
      style SaveModel fill:#e6ffe6

  6. WebSocket 消息协议流程图

  stateDiagram-v2
      [*] --> Disconnected

      Disconnected --> Connecting: 用户打开页面
      Connecting --> Connected: onopen 事件
      Connecting --> Disconnected: 连接失败

      Connected --> Ready: 发送认证信息
      Ready --> [*]: 用户关闭页面

      state Ready {
          [*] --> Idle

          Idle --> SendingChat: 发送聊天消息
          SendingChat --> ReceivingStream: 接收流式响应

          state ReceivingStream {
              [*] --> StreamChunk
              StreamChunk --> StreamChunk: 接收 chunk
              StreamChunk --> ToolUse: 检测到工具调用
              ToolUse --> ExecutingTool: 执行工具
              ExecutingTool --> StreamChunk: 工具结果返回
              StreamChunk --> [*]: done=true
          }

          ReceivingStream --> Idle: 流式完成

          Idle --> SendingTask: 发送子任务
          SendingTask --> ReceivingTaskStream: 接收任务流式响应
          ReceivingTaskStream --> Idle: 任务完成

          Idle --> Heartbeat: 定时心跳
          Heartbeat --> Idle: pong 响应
      }

      Ready --> Reconnecting: 连接断开
      Reconnecting --> Connected: 重连成功
      Reconnecting --> Disconnected: 重连失败(超过3次)

      note right of ReceivingStream
          消息类型:
          - stream: 文本内容
          - tool_use: 工具调用
          - task_stream: 子任务内容
          - error: 错误信息
      end note

      note right of Heartbeat
          每 30 秒发送 ping
          超时无 pong 则重连
      end note

  7. 代理配置管理流程图

  flowchart LR
      subgraph User["用户操作"]
          A1[添加代理]
          A2[编辑代理]
          A3[删除代理]
          A4[查看提示词]
      end

      subgraph Store["Vuex Store (agents)"]
          S1[agentConfigs 数组]
          S2[loadAgents action]
          S3[saveAgent action]
          S4[removeAgent action]
      end

      subgraph Storage["LocalStorage"]
          L1[kode_agents key]
          L2[JSON 序列化数据]
      end

      subgraph Defaults["默认配置"]
          D1[general-purpose<br/>所有工具]
          D2[researcher<br/>WebSearch/URLFetcher]
          D3[summarizer<br/>MemoryRead]
      end

      A1 --> Form[填写表单]
      Form --> |agentType<br/>whenToUse<br/>systemPrompt<br/>tools<br/>modelName| S3

      A2 --> Load[加载现有数据]
      Load --> Form

      A3 --> Confirm{确认删除?}
      Confirm -->|是| S4
      Confirm -->|否| Cancel[取消]

      A4 --> Display[对话框显示<br/>systemPrompt]

      S3 --> Validate{验证数据}
      Validate -->|成功| S1
      Validate -->|失败| Error[显示错误]

      S4 --> S1
      S1 --> Persist[持久化]
      Persist --> L1
      L1 --> L2

      S2 --> Check{LocalStorage<br/>有数据?}
      Check -->|有| L1
      Check -->|无| D1
      D1 --> D2
      D2 --> D3
      D3 --> S1
      L1 --> S1

      style Form fill:#e1f5ff
      style S1 fill:#fff4e1
      style L1 fill:#e6ffe6

bash 复制代码

 时间线：

  时刻 0s: 前端调用 query()
           ↓
  时刻 1s: query() 内部调用 LLM 流式 API
           ↓
  时刻 2s: LLM 返回第一块文本
           query() 执行 yield assistantMessage（包含 tool_use）
           ↓
           【前端的 for await 循环收到第一个 message】
           setMessages([...messages, assistantMessage])
           ↓
           【前端显示 AI 消息 + 工具调用信息】
           ↓
           【循环继续等待下一个 yield...】← 这里会阻塞！
           ↓
  时刻 3s: query() 内部检测到 tool_use，开始执行工具
           ↓
  时刻 4s: 工具执行中...
           query() 执行 yield toolProgressMessage
           ↓
           【前端的 for await 循环收到第二个 message】
           setMessages([...messages, toolProgressMessage])
           ↓
           【前端显示工具执行进度】
           ↓
           【循环继续等待下一个 yield...】← 这里又阻塞！
           ↓
  时刻 6s: 工具执行完成
           query() 递归调用自己
           ↓
  时刻 7s: LLM 返回最终答案
           query() 执行 yield finalMessage
           ↓
           【前端的 for await 循环收到第三个 message】
           setMessages([...messages, finalMessage])
           ↓
           【循环结束，query() 函数返回】
           ↓
  时刻 8s: setIsLoading(false) ← 只有这里才标记完成

  关键代码分析

  1. query() 的返回类型：AsyncGenerator<Message, void>

  // src/query.ts:161
  export async function* query(
    messages: Message[],
    systemPrompt: string[],
    context: { [k: string]: string },
    canUseTool: CanUseToolFn,
    toolUseContext: ExtendedToolUseContext,
  ): AsyncGenerator<Message, void> {
    // ↑↑↑ 注意这个星号 *，这是生成器函数

    // 1️⃣ 返回 AI 消息
    yield assistantMessage

    // 2️⃣ 执行工具，返回工具进度
    for await (const message of runToolsConcurrently(...)) {
      yield message  // ← 每个工具进度都会 yield
    }

    // 3️⃣ 递归调用，返回后续消息
    yield* await query([...messages, assistantMessage, ...toolResults], ...)
  }

  2. 前端的 for await 循环会在每个 yield 处暂停

  // src/screens/REPL.tsx:370
  for await (const message of query(...)) {
    // ↑ 这个循环不是一次性拿到所有消息
    // 而是每次 query() 执行 yield 时，才会继续执行一次

    setMessages(oldMessages => [...oldMessages, message])
    // 执行完这行后，循环会暂停，等待下一个 yield
  }

  实际执行演示

  假设用户问："搜索 Go 1.22 新特性"

  执行日志（带时间戳）

  [前端 00:00:000] for await (const message of query(...)) {  ← 开始循环
  [后端 00:00:100] query() 函数开始
  [后端 00:00:200] 调用 queryLLM()
  [后端 00:01:500] LLM 流式响应结束
  [后端 00:01:600] yield assistantMessage  ← 第 1 次 yield
                   ↓
  [前端 00:01:600] 收到 message（类型：assistant，包含 tool_use）
  [前端 00:01:610] setMessages([..., assistantMessage])
  [前端 00:01:620] 渲染 UI：显示 "让我为你搜索..." + "工具：WebSearch"
  [前端 00:01:630] 循环暂停，等待下一个 yield...  ← 阻塞在这里
                   ↓
  [后端 00:01:700] 检测到 tool_use，开始执行 WebSearch
  [后端 00:02:500] yield toolProgressMessage  ← 第 2 次 yield
                   ↓
  [前端 00:02:500] 收到 message（类型：progress）
  [前端 00:02:510] setMessages([..., toolProgressMessage])
  [前端 00:02:520] 渲染 UI：显示 "WebSearch 执行中..."
  [前端 00:02:530] 循环暂停，等待下一个 yield...  ← 又阻塞在这里
                   ↓
  [后端 00:03:000] WebSearch 完成，得到结果
  [后端 00:03:100] 递归调用 query([...messages, assistantMessage, toolResult])
  [后端 00:03:200] 再次调用 queryLLM()
  [后端 00:04:500] LLM 返回最终答案
  [后端 00:04:600] yield finalMessage  ← 第 3 次 yield
                   ↓
  [前端 00:04:600] 收到 message（类型：assistant）
  [前端 00:04:610] setMessages([..., finalMessage])
  [前端 00:04:620] 渲染 UI：显示 "根据搜索结果，Go 1.22..."
  [前端 00:04:630] 循环暂停，等待下一个 yield...
                   ↓
  [后端 00:04:700] query() 函数结束（没有更多 yield）
                   ↓
  [前端 00:04:700] for await 循环结束
  [前端 00:04:710] setIsLoading(false)
  [前端 00:04:720] 显示输入框，用户可以继续提问

  关键点总结

  1. 前端不是一次性收到所有消息

  - ❌ 错误理解：query() 返回一个包含所有消息的数组
  - ✅ 正确理解：query() 是一个生成器，每次 yield 才返回一个消息

  2. for await 循环会在每个 yield 处暂停

  - 前端不会"提前收到完整流信息"
  - 每收到一个消息，就更新 UI，然后等待下一个消息
  - 只有当 query() 函数完全结束，循环才会退出

  3. 工具执行期间，前端在等待

  // 后端
  yield assistantMessage  // ← 前端收到，显示 "AI 要调用工具了"
                           // ← 前端的 for await 循环在这里等待

  // 执行工具（可能需要 3 秒）
  executeTool(...)
                           // ← 前端仍在等待...

  yield toolResult        // ← 前端收到，显示 "工具执行完成"

  Web 版本如何实现？

  基于这个理解，Web 版本的正确实现应该是：

  Go 后端（使用 Channel 模拟生成器）

  func handleChatStream(w http.ResponseWriter, r *http.Request) {
      flusher, _ := w.(http.Flusher)

      // 创建消息通道
      messageChan := make(chan Message)

      // 启动异步任务
      go func() {
          // 调用 query 函数（模拟生成器）
          query(messages, systemPrompt, tools, messageChan)
          close(messageChan)  // 结束时关闭通道
      }()

      // 逐个发送消息（模拟 for await）
      for message := range messageChan {
          // 发送 SSE 事件
          switch message.Type {
          case "assistant":
              sendSSE(w, flusher, "assistant_message", message.Data)
          case "progress":
              sendSSE(w, flusher, "tool_progress", message.Data)
          }
      }

      // 所有消息发送完成
      sendSSE(w, flusher, "complete", nil)
  }

  func query(messages []Message, systemPrompt []string, tools []Tool, out chan<- Message) {
      // 1. 调用 LLM
      assistantMsg := queryLLM(messages, systemPrompt, tools)

      // 2. 发送 AI 消息
      out <- Message{Type: "assistant", Data: assistantMsg}

      // 3. 检测工具调用
      toolUses := extractToolUses(assistantMsg)

      if len(toolUses) == 0 {
          return  // 无工具调用，结束
      }

      // 4. 执行工具
      for _, toolUse := range toolUses {
          // 发送进度消息
          out <- Message{Type: "progress", Data: "执行工具中..."}

          result := executeTool(toolUse.Name, toolUse.Input)

          // 发送工具结果
          out <- Message{Type: "tool_result", Data: result}
      }

      // 5. 递归调用
      newMessages := append(messages, assistantMsg, toolResults...)
      query(newMessages, systemPrompt, tools, out)  // 递归
  }

bash 复制代码

 1. TaskTool 的本质：AI 委派给 AI 的"子任务系统"

  核心概念

  TaskTool 不是给用户用的，而是给 AI 自己用的。它允许主 AI 将复杂任务委派给另一个独立的 AI 实例（子 Agent）去完成。

  用户 → 主 AI（Claude Sonnet）
           ↓
        决定任务太复杂
           ↓
        调用 TaskTool
           ↓
      启动子 Agent（独立的 AI 对话）
           ↓
      子 Agent 执行任务（可以调用工具）
           ↓
      返回结果给主 AI
           ↓
      主 AI 继续处理

  ---
  2. 为什么需要 TaskTool？解决了什么问题？

  问题 1：复杂任务的并行处理

  场景：用户要求 "帮我重构项目，同时写测试，并更新文档"

  没有 TaskTool 的情况：
  主 AI：
  1. 先重构代码（调用 FileEdit 工具 10 次）
  2. 再写测试（调用 FileWrite 工具 5 次）
  3. 最后更新文档（调用 FileEdit 工具 3 次）

  总耗时：串行执行，需要 3 轮对话，耗时很长

  有 TaskTool 的情况：
  主 AI：
  1. 调用 TaskTool（子任务 1：重构代码）
  2. 调用 TaskTool（子任务 2：写测试）
  3. 调用 TaskTool（子任务 3：更新文档）

  并发执行！所有子任务同时进行，耗时大幅减少

  问题 2：避免上下文污染

  场景：用户在讨论架构设计，突然要求 "顺便帮我搜索一下 Go 1.22 的新特性"

  没有 TaskTool：
  主对话上下文：
  - 架构设计讨论（5000 tokens）
  - 搜索任务（WebSearch 调用）
  - 搜索结果（3000 tokens）
  - 继续架构讨论（但上下文已经被污染）

  问题：搜索任务的中间过程会干扰主对话

  有 TaskTool：
  主对话上下文：
  - 架构设计讨论（5000 tokens）
  - 调用 TaskTool（仅记录"搜索任务"和"最终结果"）
  - 继续架构讨论（上下文干净）

  子任务上下文（独立）：
  - 搜索任务（WebSearch 调用）
  - 搜索结果（3000 tokens）
  - [子任务结束，上下文丢弃]

  优势：主对话不会被子任务的中间过程污染

  问题 3：专业化分工

  场景：用户要求 "检查代码质量，并修复 Bug"

  有 Agent 配置的情况：
  主 AI 分析任务：
  - 代码检查 → 调用 TaskTool(subagent_type="code-reviewer")
    - code-reviewer agent 专注于代码审查
    - 只能使用 FileRead、Grep 工具（不能修改代码）

  - 修复 Bug → 调用 TaskTool(subagent_type="bug-fixer")
    - bug-fixer agent 专注于修复
    - 可以使用 FileEdit、Bash 工具（能修改代码）

  优势：不同 agent 有不同的系统提示和工具权限，各司其职

  ---
  3. TaskTool 的实际工作流程

  完整示例：用户要求搜索并总结

  用户输入：
  "搜索 Go 1.22 的新特性，并用中文总结"

  主 AI 的思考过程：
  主 AI（Claude Sonnet）：
  "这个任务需要搜索 + 总结，我可以自己做，但也可以委派给子 Agent..."

  决定：使用 TaskTool 委派

  主 AI 调用 TaskTool：
  {
    "tool": "Task",
    "input": {
      "description": "搜索并总结 Go 1.22",
      "prompt": "搜索 Go 1.22 的新特性，并用中文总结主要内容",
      "subagent_type": "general-purpose",
      "model_name": "task"
    }
  }

  TaskTool 执行过程：

  // TaskTool.tsx:214-291
  for await (const message of query(
    [createUserMessage("搜索 Go 1.22 的新特性，并用中文总结")],
    taskPrompt,  // ← 子 Agent 的系统提示
    context,
    hasPermissionsToUseTool,
    { tools: [WebSearchTool, WebFetchTool], ... }  // ← 子 Agent 可用的工具
  )) {
    // 子 Agent 的对话过程：
    // 1. 子 AI 决定调用 WebSearch
    // 2. 执行 WebSearch（找到 10 条结果）
    // 3. 子 AI 根据结果生成总结
    // 4. 返回最终结果

    yield {
      type: 'progress',
      content: message  // ← 主 AI 看不到细节，只看到进度
    }
  }

  // 最终返回给主 AI 的结果
  yield {
    type: 'result',
    data: "Go 1.22 的主要新特性包括：\n1. 增强的 for-range 循环..."
  }

  主 AI 收到结果后：
  主 AI：
  "子 Agent 已经完成搜索和总结了，结果是：
  Go 1.22 的主要新特性包括：...

  现在我把这个结果告诉用户。"

  ---
  4. TaskTool 的关键特性

  特性 1：独立的对话上下文

  // TaskTool.tsx:132
  const messages: MessageType[] = [createUserMessage(effectivePrompt)]
  // ↑ 全新的消息列表，不包含主对话的历史

  意义：
  - 子任务有自己独立的对话历史
  - 不会消耗主对话的上下文窗口
  - 子任务结束后，上下文自动释放

  特性 2：工具权限隔离

  // TaskTool.tsx:133-144
  let tools = await getTaskTools(safeMode)

  // 应用 agent 的工具过滤
  if (toolFilter) {
    if (toolFilter === '*') {
      // 保留所有工具
    } else if (Array.isArray(toolFilter)) {
      tools = tools.filter(tool => toolFilter.includes(tool.name))
    }
  }

  示例配置（.kode/agents/code-reviewer.md）：
  ---
  name: code-reviewer
  description: "Code review specialist"
  tools: ["FileRead", "Grep"]  # ← 只能读文件，不能修改
  ---

  You are a code reviewer. Focus on finding bugs and suggesting improvements.

  特性 3：模型切换

  // TaskTool.tsx:90, 118-123
  let effectiveModel = model_name || 'task'

  if (!model_name && agentConfig.model_name) {
    if (agentConfig.model_name !== 'inherit') {
      effectiveModel = agentConfig.model_name
    }
  }

  意义：
  - 主 AI 可以用 Claude Sonnet（强大但昂贵）
  - 子任务可以用 Claude Haiku（快速且便宜）
  - 节省成本，提高效率

  特性 4：并发执行

  // TaskTool.tsx:335-339
  isReadOnly() {
    return true  // ← 只读工具
  },
  isConcurrencySafe() {
    return true  // ← 支持并发
  }

  示例：
  // 主 AI 可以同时调用多个 TaskTool
  assistant.toolCalls = [
    { tool: "Task", input: { prompt: "搜索 Go 1.22" } },
    { tool: "Task", input: { prompt: "搜索 Rust 1.75" } },
    { tool: "Task", input: { prompt: "搜索 Python 3.12" } },
  ]

  // 三个搜索任务并发执行！

  ---
  5. TaskTool 的实际使用场景

  场景 1：代码审查自动化

  用户："写一个快速排序函数"

  主 AI：
  1. 调用 FileWrite 写入代码
  2. 调用 TaskTool(subagent_type="code-reviewer", prompt="审查刚才写的代码")
  3. 子 Agent 审查代码，返回建议
  4. 主 AI 根据建议修复代码

  场景 2：多模块并行开发

  用户："重构整个项目"

  主 AI：
  并发调用 5 个 TaskTool：
  1. Task(prompt="重构 auth 模块")
  2. Task(prompt="重构 database 模块")
  3. Task(prompt="重构 api 模块")
  4. Task(prompt="重构 utils 模块")
  5. Task(prompt="更新所有文档")

  所有任务同时执行，大幅提速

  场景 3：研究 + 实现

  用户："实现 OAuth 2.0 认证"

  主 AI：
  1. Task(prompt="研究 OAuth 2.0 的最佳实践")
     - 子 Agent 使用 WebSearch、WebFetch 研究
     - 返回实现建议

  2. 主 AI 根据建议实现代码
  3. Task(prompt="测试 OAuth 2.0 实现")
     - 子 Agent 执行测试
     - 返回测试结果

  4. 主 AI 修复问题

  ---
  6. TaskTool 的优势总结

  | 优势    | 说明                 | 实际价值        |
  |-------|--------------------|-------------|
  | 并行执行  | 多个 TaskTool 可以同时运行 | 速度提升 3-10 倍 |
  | 上下文隔离 | 子任务不污染主对话          | 主对话保持清晰     |
  | 专业化分工 | 不同 agent 有不同能力     | 代码质量更高      |
  | 成本优化  | 子任务用便宜的模型          | 成本降低 50%+   |
  | 可扩展性  | 用户可自定义 agent       | 适应各种场景      |

  ---
  7. Web 版本是否需要 TaskTool？

  你的文档中去掉了 agent 自定义，那 TaskTool 还有用吗？

  答案：仍然有用！但需要简化

  简化后的 TaskTool 价值

  即使去掉 agent 自定义功能，TaskTool 仍然可以：

  1. 并行搜索任务

  用户："对比 Vue 3、React 18、Angular 15 的性能"

  主 AI 并发调用：
  - Task(prompt="搜索 Vue 3 性能数据")
  - Task(prompt="搜索 React 18 性能数据")
  - Task(prompt="搜索 Angular 15 性能数据")

  同时执行，3 秒内完成（而不是 9 秒）

  2. 复杂的多步骤任务

  用户："研究并实现 WebSocket 聊天功能"

  主 AI 调用：
  Task(prompt="先搜索 WebSocket 最佳实践，然后给出 Go 语言的实现建议")

  子 Agent：
  1. WebSearch("WebSocket best practices")
  2. WebFetch(抓取文档)
  3. 生成 Go 代码建议
  4. 返回完整报告给主 AI

  3. 减少主对话的 token 消耗

  主对话（1000 tokens）
     ↓
  调用 TaskTool
     ↓
  子任务（使用 5000 tokens 搜索、处理）
     ↓
  返回简洁结果（100 tokens）
     ↓
  主对话继续（只增加 100 tokens）

  节省：主对话不包含子任务的 5000 tokens

  简化版 TaskTool 实现建议

  // Web 版本的简化 TaskTool
  type SimplifiedTaskTool struct{}

  func (t *SimplifiedTaskTool) Call(input TaskInput, ctx ToolContext) <-chan ToolResult {
      results := make(chan ToolResult)

      go func() {
          defer close(results)

          // 固定系统提示（不加载配置文件）
          systemPrompt := []string{
              "You are a helpful task assistant.",
              "Use available tools to complete the task efficiently.",
          }

          // 固定工具列表（WebSearch、WebFetch）
          tools := []Tool{WebSearchTool, WebFetchTool}

          // 使用 task 模型（便宜）
          model := "task"

          // 创建独立的消息列表
          messages := []Message{
              {Type: "user", Content: input.Prompt},
          }

          // 执行 query（核心逻辑完整保留）
          for aiMessage := range query(messages, systemPrompt, tools, model, ctx) {
              // 报告进度
              results <- ToolResult{
                  Type:    "progress",
                  Content: truncate(aiMessage.Content, 200),
              }
          }

          // 返回最终结果
          lastMessage := messages[len(messages)-1]
          results <- ToolResult{Type: "result", Content: lastMessage.Content}
      }()

      return results
  }

  ---
  8. 最终建议

  Web 版本的 TaskTool 策略

  推荐：保留简化版 TaskTool

  保留理由

  1. ✅ 并行执行：多个搜索任务同时进行，速度快
  2. ✅ 上下文隔离：复杂任务不污染主对话
  3. ✅ 成本优化：子任务用便宜的模型

  去除功能

  - ❌ Agent 配置文件加载
  - ❌ 工具权限过滤
  - ❌ 自定义系统提示
  - ❌ 模型覆盖逻辑

  简化后的效果

  用户 → 主 AI（GPT-4o，贵）
           ↓
        调用 TaskTool
           ↓
      子 Agent（Haiku，便宜）
           ↓
      执行搜索任务（WebSearch + WebFetch）
           ↓
      返回简洁结果
           ↓
      主 AI 继续（节省 token 和成本）

  ---
  总结

  TaskTool 的核心价值：
  1. 让 AI 能委派任务给 AI（就像人类团队协作）
  2. 并行执行复杂任务（提速 3-10 倍）
  3. 隔离上下文（保持主对话清晰）
  4. 节省成本（子任务用便宜模型）

  Web 版本建议：
  - 保留 TaskTool 的核心逻辑（并行、隔离、成本优化）
  - 去除 Agent 自定义功能（降低复杂度）
  - 固定工具列表（WebSearch、WebFetch）
  - 固定系统提示（通用助手）

  这样既保留了 TaskTool 的核心价值，又大幅简化了实现复杂度！

bash 复制代码

Kode 上下文长度处理完整逻辑

  1. 核心架构总览

  Kode 的上下文处理采用三层防护机制：

  第一层：Token 计数监控（实时）
     ↓
  第二层：自动压缩触发检测（每次 query 前）
     ↓
  第三层：压缩执行（多种策略）

  ---
  2. 第一层：Token 计数系统

  2.1 Token 计数逻辑（src/utils/tokens.ts）

  export function countTokens(messages: Message[]): number {
    // 【关键】从最新消息往前找，找到第一个包含 usage 的 assistant 消息
    let i = messages.length - 1
    while (i >= 0) {
      const message = messages[i]
      if (
        message?.type === 'assistant' &&
        'usage' in message.message &&
        // 排除合成消息（如系统提示）
        !(SYNTHETIC_ASSISTANT_MESSAGES.has(message.message.content[0].text))
      ) {
        const { usage } = message.message
        return (
          usage.input_tokens +                        // 输入 token
          (usage.cache_creation_input_tokens ?? 0) +  // 缓存写入 token
          (usage.cache_read_input_tokens ?? 0) +      // 缓存读取 token
          usage.output_tokens                         // 输出 token
        )
      }
      i--
    }
    return 0  // 没找到，返回 0
  }

  核心原理：
  - ✅ 利用 LLM API 返回的 usage 数据（精确计数）
  - ✅ 不需要自己实现 tokenizer（避免不准确）
  - ✅ 只看最新的 assistant 消息（因为 usage 是累计的）

  示例：
  消息历史：
  1. 用户："你好"
  2. AI："你好！"（usage: input=50, output=10）
  3. 用户："帮我写代码"
  4. AI："好的..."（usage: input=150, output=80）← 这个 usage 包含了所有历史

  countTokens() 返回：150 + 80 = 230

  ---
  3. 第二层：自动压缩触发检测

  3.1 触发时机（src/query.ts:177-183）

  export async function* query(...) {
    // 【关键】每次 query 开始前都会检查
    const { messages: processedMessages, wasCompacted } = await checkAutoCompact(
      messages,
      toolUseContext,
    )
    if (wasCompacted) {
      messages = processedMessages  // 使用压缩后的消息
    }

    // 继续执行 query...
  }

  3.2 触发条件（src/utils/autoCompactCore.ts:89-96）

  async function shouldAutoCompact(messages: Message[]): Promise<boolean> {
    // 至少 3 条消息才考虑压缩
    if (messages.length < 3) return false

    // 计算当前 token 数量
    const tokenCount = countTokens(messages)

    // 获取阈值
    const { isAboveAutoCompactThreshold } = await calculateThresholds(tokenCount)

    return isAboveAutoCompactThreshold
  }

  3.3 阈值计算（src/utils/autoCompactCore.ts:73-83）

  async function calculateThresholds(tokenCount: number) {
    // 获取当前模型的上下文限制
    const contextLimit = await getCompressionModelContextLimit()

    // 【关键】阈值是上下文限制的 92%
    const AUTO_COMPACT_THRESHOLD_RATIO = 0.92
    const autoCompactThreshold = contextLimit * AUTO_COMPACT_THRESHOLD_RATIO

    return {
      isAboveAutoCompactThreshold: tokenCount >= autoCompactThreshold,
      percentUsed: Math.round((tokenCount / contextLimit) * 100),
      tokensRemaining: Math.max(0, autoCompactThreshold - tokenCount),
      contextLimit,
    }
  }

  示例计算：
  当前模型：Claude Sonnet 4（上下文：200,000 tokens）
  阈值：200,000 * 0.92 = 184,000 tokens

  当前使用：185,000 tokens
  185,000 >= 184,000 → 触发自动压缩

  3.4 获取模型上下文限制（src/utils/autoCompactCore.ts:24-39）

  async function getCompressionModelContextLimit(): Promise<number> {
    try {
      // 使用 ModelManager 获取当前 main 模型的配置
      const modelManager = getModelManager()
      const modelProfile = modelManager.getModel('main')

      if (modelProfile?.contextLength) {
        return modelProfile.contextLength
      }

      // 降级默认值
      return 200_000
    } catch (error) {
      return 200_000
    }
  }

  ---
  4. 第三层：压缩执行策略

  4.1 Kode 的压缩策略（src/utils/autoCompactCore.ts:113-137）

  export async function checkAutoCompact(
    messages: Message[],
    toolUseContext: any,
  ): Promise<{ messages: Message[]; wasCompacted: boolean }> {
    // 检查是否需要压缩
    if (!(await shouldAutoCompact(messages))) {
      return { messages, wasCompacted: false }
    }

    try {
      // 执行压缩
      const compactedMessages = await executeAutoCompact(messages, toolUseContext)

      return {
        messages: compactedMessages,
        wasCompacted: true,
      }
    } catch (error) {
      // 【关键】压缩失败时优雅降级，继续使用原消息
      console.error('Auto-compact failed, continuing with original messages:', error)
      return { messages, wasCompacted: false }
    }
  }

  4.2 压缩执行流程（src/utils/autoCompactCore.ts:146-223）

  async function executeAutoCompact(
    messages: Message[],
    toolUseContext: any,
  ): Promise<Message[]> {

    // 步骤 1：生成对话摘要
    const summaryRequest = createUserMessage(COMPRESSION_PROMPT)

    const summaryResponse = await queryLLM(
      normalizeMessagesForAPI([...messages, summaryRequest]),
      [
        'You are a helpful AI assistant tasked with creating comprehensive conversation summaries...'
      ],
      0,
      toolUseContext.options.tools,
      toolUseContext.abortController.signal,
      {
        safeMode: false,
        model: 'main',  // ← 使用主模型生成摘要
        prependCLISysprompt: true,
      },
    )

    // 提取摘要文本
    const summary = extractSummaryText(summaryResponse)

    // 步骤 2：自动恢复最近访问的文件
    const recoveredFiles = await selectAndReadFiles()

    // 步骤 3：构建压缩后的消息列表
    const compactedMessages = [
      createUserMessage(
        'Context automatically compressed due to token limit. Essential information preserved.'
      ),
      summaryResponse,  // 摘要消息
    ]

    // 步骤 4：附加恢复的文件
    if (recoveredFiles.length > 0) {
      for (const file of recoveredFiles) {
        const contentWithLines = addLineNumbers({ content: file.content, startLine: 1 })
        const recoveryMessage = createUserMessage(
          `**Recovered File: ${file.path}**\n\n\`\`\`\n${contentWithLines}\n\`\`\`\n\n` +
          `*Automatically recovered (${file.tokens} tokens)${file.truncated ? ' [truncated]' : ''}*`
        )
        compactedMessages.push(recoveryMessage)
      }
    }

    // 步骤 5：清理缓存状态
    getMessagesSetter()([])
    getContext.cache.clear?.()
    getCodeStyle.cache.clear?.()
    resetFileFreshnessSession()

    return compactedMessages
  }

  4.3 摘要生成 Prompt（src/utils/autoCompactCore.ts:41-67）

  const COMPRESSION_PROMPT = `Please provide a comprehensive summary of our conversation structured as follows:

  ## Technical Context
  Development environment, tools, frameworks, and configurations in use...

  ## Project Overview
  Main project goals, features, and scope...

  ## Code Changes
  Files created, modified, or analyzed during our conversation...

  ## Debugging & Issues
  Problems encountered and their root causes...

  ## Current Status
  What we just completed successfully...

  ## Pending Tasks
  Immediate next steps and priorities...

  ## User Preferences
  Coding style, formatting, and organizational preferences...

  ## Key Decisions
  Important technical decisions made and their rationale...

  Focus on information essential for continuing the conversation effectively...`

  摘要示例输出：
  ## Technical Context
  - Go 1.22 backend with Gin framework
  - PostgreSQL 15 database
  - Vue 2 frontend

  ## Code Changes
  - Created `auth.go` with JWT authentication
  - Modified `main.go` to add auth middleware
  - Updated `schema.sql` with users table

  ## Current Status
  - Authentication system implemented and tested
  - All tests passing

  ## Pending Tasks
  - Add password reset functionality
  - Implement OAuth 2.0 integration

  ---
  5. 文件自动恢复机制

  5.1 文件选择逻辑（src/utils/fileRecoveryCore.ts:22-71）

  export async function selectAndReadFiles() {
    // 配置限制
    const MAX_FILES_TO_RECOVER = 5
    const MAX_TOKENS_PER_FILE = 10_000
    const MAX_TOTAL_FILE_TOKENS = 50_000

    // 获取最重要的文件（基于访问频率和时间）
    const importantFiles = fileFreshnessService.getImportantFiles(MAX_FILES_TO_RECOVER)

    const results = []
    let totalTokens = 0

    for (const fileInfo of importantFiles) {
      try {
        // 读取文件内容
        const { content } = readTextContent(fileInfo.path)
        const estimatedTokens = Math.ceil(content.length * 0.25)

        // 单文件限制：超过 10,000 tokens 则截断
        let finalContent = content
        let truncated = false

        if (estimatedTokens > MAX_TOKENS_PER_FILE) {
          const maxChars = Math.floor(MAX_TOKENS_PER_FILE / 0.25)
          finalContent = content.substring(0, maxChars)
          truncated = true
        }

        const finalTokens = Math.min(estimatedTokens, MAX_TOKENS_PER_FILE)

        // 总量限制：超过 50,000 tokens 则停止
        if (totalTokens + finalTokens > MAX_TOTAL_FILE_TOKENS) {
          break
        }

        totalTokens += finalTokens
        results.push({
          path: fileInfo.path,
          content: finalContent,
          tokens: finalTokens,
          truncated,
        })
      } catch (error) {
        console.error(`Failed to read file for recovery: ${fileInfo.path}`, error)
      }
    }

    return results
  }

  文件重要性评分（基于）：
  - 最近修改时间
  - 访问频率
  - 文件类型（.go、.ts 等源代码优先）

  ---
  6. 完整的压缩流程图

  时刻 0: 用户发送消息
           ↓
  时刻 1: query() 函数开始
           ↓
  时刻 2: checkAutoCompact(messages)
           ├─ 计算 token 数量：185,000
           ├─ 获取上下文限制：200,000
           ├─ 计算阈值：200,000 * 0.92 = 184,000
           └─ 判断：185,000 >= 184,000 → 触发压缩
           ↓
  时刻 3: executeAutoCompact()
           ├─ 步骤 1：生成摘要
           │   └─ 调用 LLM（model='main'）
           │       └─ 输入：所有历史消息 + 摘要 prompt
           │       └─ 输出：结构化摘要（约 3000 tokens）
           ├─ 步骤 2：恢复文件
           │   ├─ 获取最近访问的 5 个文件
           │   ├─ 读取文件内容（限制 10,000 tokens/文件）
           │   └─ 总计 5 个文件，约 30,000 tokens
           ├─ 步骤 3：构建压缩后的消息
           │   └─ [系统提示] + [摘要消息] + [文件 1] + [文件 2] + ...
           └─ 步骤 4：清理缓存
           ↓
  时刻 4: 返回压缩后的消息（约 35,000 tokens）
           ↓
  时刻 5: query() 继续执行（使用压缩后的消息）
           ↓
  时刻 6: 调用 LLM，生成响应

  ---
  7. MessageContextManager 的其他策略

  虽然 Kode 实际使用的是 autoCompactCore.ts 中的逻辑，但 messageContextManager.ts 提供了备用策略：

  7.1 策略 1：保留最近消息（Preserve Recent）

  private preserveRecentMessages(messages, strategy) {
    // 估计可以保留多少条消息
    const preserveCount = strategy.preserveCount || this.estimateMessageCount(strategy.maxTokens)

    // 直接保留最后 N 条
    const truncatedMessages = messages.slice(-preserveCount)

    return {
      truncatedMessages,
      removedCount: messages.length - truncatedMessages.length,
      preservedTokens: countTokens(truncatedMessages),
    }
  }

  示例：
  原消息：50 条
  保留：最后 10 条
  删除：前 40 条

  7.2 策略 2：保留重要消息（Preserve Important）

  private preserveImportantMessages(messages, strategy) {
    const importantMessages = []
    const recentMessages = messages.slice(-5)  // 最后 5 条

    // 找出重要消息（包含错误、警告等）
    for (const message of messages) {
      if (this.isImportantMessage(message)) {
        importantMessages.push(message)
      }
    }

    // 合并去重
    const truncatedMessages = [...importantMessages, ...recentMessages].sort(byOriginalOrder)

    return { truncatedMessages, ... }
  }

  private isImportantMessage(message) {
    if (message.type === 'user') return true  // 用户消息总是重要

    if (message.type === 'assistant') {
      const text = extractText(message).toLowerCase()
      // 包含错误关键词的消息是重要的
      return text.includes('error') ||
             text.includes('failed') ||
             text.includes('warning') ||
             text.includes('critical')
    }

    return false
  }

  示例：
  原消息：50 条
  ├─ 重要消息：8 条（包含错误、用户消息）
  └─ 最近消息：5 条

  保留：8 + 5 = 13 条（去重后可能更少）
  删除：37 条

  7.3 策略 3：智能压缩（Smart Compression）

  private async smartCompressionStrategy(messages, strategy) {
    // 保留最后 30% 的消息
    const recentCount = Math.min(10, Math.floor(messages.length * 0.3))
    const recentMessages = messages.slice(-recentCount)
    const olderMessages = messages.slice(0, -recentCount)

    // 为旧消息生成摘要
    const summary = this.createMessagesSummary(olderMessages)

    // 创建摘要消息
    const summaryMessage = {
      type: 'assistant',
      message: {
        role: 'assistant',
        content: [{
          type: 'text',
          text: `[CONVERSATION SUMMARY - ${olderMessages.length} messages compressed]\n\n${summary}`
        }]
      },
      costUSD: 0,
      durationMs: 0,
      uuid: crypto.randomUUID()
    }

    return [summaryMessage, ...recentMessages]
  }

  示例：
  原消息：50 条
  ├─ 旧消息：35 条 → 生成摘要
  └─ 最近消息：15 条 → 完整保留

  压缩后：1 条摘要 + 15 条最近 = 16 条

  ---
  8. 关键设计亮点

  8.1 精确的 Token 计数

  // ✅ 利用 LLM 返回的 usage
  const tokenCount = usage.input_tokens + usage.output_tokens

  优势：
  - 不需要自己实现 tokenizer
  - 精确度 100%（官方计数）
  - 包括缓存 token（cache_read/cache_creation）

  8.2 保守的触发阈值

  const AUTO_COMPACT_THRESHOLD_RATIO = 0.92  // 92%

  原因：
  - 留 8% 缓冲区，防止单次请求超限
  - 考虑系统提示、工具描述等额外 token
  - 提前压缩，避免紧急情况

  8.3 文件自动恢复

  const recoveredFiles = await selectAndReadFiles()
  // 恢复最近访问的 5 个文件（最多 50,000 tokens）

  价值：
  - 保留开发上下文（正在编辑的文件）
  - 压缩后仍可继续编码工作
  - 限制总量，避免压缩失效

  8.4 优雅降级

  try {
    const compactedMessages = await executeAutoCompact(...)
    return { messages: compactedMessages, wasCompacted: true }
  } catch (error) {
    console.error('Auto-compact failed, continuing with original messages:', error)
    return { messages, wasCompacted: false }  // ← 失败时继续使用原消息
  }

  保证系统可用性：
  - 压缩失败不影响对话
  - 最多是 token 超限，LLM 会报错
  - 用户可以手动清空对话

  ---
  9. Web 版本实现建议

  基于 Kode 的逻辑，Web 版本应该：

  9.1 完整保留的逻辑

  // 1. Token 计数（使用 LLM 返回的 usage）
  func countTokens(messages []Message) int {
      for i := len(messages) - 1; i >= 0; i-- {
          if messages[i].Type == "assistant" && messages[i].Usage != nil {
              return messages[i].Usage.InputTokens + messages[i].Usage.OutputTokens
          }
      }
      return 0
  }

  // 2. 触发检测
  func shouldAutoCompact(messages []Message, modelContextLimit int) bool {
      if len(messages) < 3 {
          return false
      }

      tokenCount := countTokens(messages)
      threshold := float64(modelContextLimit) * 0.92

      return float64(tokenCount) >= threshold
  }

  // 3. 压缩执行
  func executeAutoCompact(messages []Message, model string) ([]Message, error) {
      // 生成摘要
      summaryPrompt := "Please provide a comprehensive summary of our conversation..."
      summaryResponse := queryLLM(append(messages, summaryPrompt), model)

      // 构建压缩后的消息
      compactedMessages := []Message{
          {Type: "user", Content: "Context automatically compressed..."},
          summaryResponse,
      }

      return compactedMessages, nil
  }

  9.2 去除的功能

  - ❌ 文件自动恢复（Web 环境无文件系统）
  - ❌ 缓存清理（fileFreshnessService、getContext.cache）
  - ❌ 多种压缩策略（只保留一种即可）

  ---
  10. 总结

  Kode 的上下文处理核心逻辑

  1. Token 计数：利用 LLM 返回的 usage（精确）
  2. 触发阈值：92% 上下文窗口（保守）
  3. 压缩策略：生成摘要 + 恢复文件（智能）
  4. 优雅降级：失败时继续使用原消息（可靠）

  关键数据

  | 参数     | 值             | 说明                |
  |--------|---------------|-------------------|
  | 触发阈值   | 92%           | 上下文窗口的 92%        |
  | 最小消息数  | 3 条           | 少于 3 条不触发         |
  | 恢复文件数  | 5 个           | 最多恢复 5 个文件        |
  | 单文件限制  | 10,000 tokens | 每个文件最多 10K tokens |
  | 文件总量限制 | 50,000 tokens | 所有文件总计 50K tokens |

bash 复制代码

Kode 采用了 适配器模式 + 工厂模式 的多层抽象架构来处理多模型兼容性问题。

  1. 模型适配器工厂系统

  核心文件：src/services/modelAdapterFactory.ts

  API 类型决策逻辑：
  private static determineAPIType(
    modelProfile: ModelProfile,
    capabilities: ModelCapabilities
  ): 'responses_api' | 'chat_completions' {
    // 1. 检查模型是否支持 Responses API
    if (capabilities.apiArchitecture.primary !== 'responses_api') {
      return 'chat_completions'
    }

    // 2. 检查是否为官方 OpenAI 端点
    const isOfficialOpenAI = !modelProfile.baseURL ||
      modelProfile.baseURL.includes('api.openai.com')

    // 3. 非官方端点使用 Chat Completions
    if (!isOfficialOpenAI) {
      return capabilities.apiArchitecture.fallback || 'chat_completions'
    }

    return capabilities.apiArchitecture.primary
  }

  2. 参数映射和转换机制

  统一请求参数接口

  export interface UnifiedRequestParams {
    messages: any[]
    systemPrompt: string[]
    tools: Tool[]
    maxTokens: number
    stream?: boolean
    reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high'
    temperature?: number
    previousResponseId?: string
    verbosity?: 'low' | 'medium' | 'high'
    allowedTools?: string[]
  }

  具体适配器实现

  GPT-5 Responses API 适配器 (src/services/adapters/responsesAPI.ts)：
  createRequest(params: UnifiedRequestParams): any {
    const request: any = {
      model: this.modelProfile.modelName,
      input: this.convertMessagesToInput(nonSystemMessages),
      instructions: this.buildInstructions(systemPrompt, systemMessages)
    }

    // GPT-5 特定参数
    if (this.shouldIncludeReasoningEffort()) {
      request.reasoning = {
        effort: params.reasoningEffort || 'medium'
      }
    }

    // 参数映射：max_tokens → max_completion_tokens
    request.max_completion_tokens = params.maxTokens

    return request
  }

  传统 Chat Completions 适配器 (src/services/adapters/chatCompletions.ts)：
  createRequest(params: UnifiedRequestParams): any {
    const request: any = {
      model: this.modelProfile.modelName,
      messages: fullMessages,
      max_tokens: maxTokens,  // 传统参数名
      temperature: this.getTemperature()
    }

    // O1 模型特殊处理
    if (this.modelProfile.modelName.startsWith('o1')) {
      delete request.temperature  // O1 不支持温度
      delete request.stream       // O1 不支持流式
    }

    return request
  }

  3. 模型能力定义系统

  核心文件：src/constants/modelCapabilities.ts

  能力注册表模式：
  const GPT5_CAPABILITIES: ModelCapabilities = {
    apiArchitecture: {
      primary: 'responses_api',
      fallback: 'chat_completions'
    },
    parameters: {
      maxTokensField: 'max_completion_tokens',  // 参数名映射
      supportsReasoningEffort: true,
      supportsVerbosity: true,
      temperatureMode: 'fixed_one'  // GPT-5 强制温度=1
    },
    toolCalling: {
      mode: 'custom_tools',
      supportsFreeform: true,  // 支持自由格式工具
      supportsAllowedTools: true
    }
  }

  4. 服务层参数转换

  核心文件：src/services/openai.ts

  智能参数转换：
  function applyModelSpecificTransformations(
    opts: OpenAI.ChatCompletionCreateParams,
  ): void {
    const features = getModelFeatures(opts.model)
    const isGPT5 = opts.model.toLowerCase().includes('gpt-5')

    // GPT-5 参数转换
    if (isGPT5 || features.usesMaxCompletionTokens) {
      // 强制使用 max_completion_tokens
      if ('max_tokens' in opts && !('max_completion_tokens' in opts)) {
        opts.max_completion_tokens = opts.max_tokens
        delete opts.max_tokens
      }

      // 强制温度 = 1
      if (features.requiresTemperatureOne && 'temperature' in opts) {
        opts.temperature = 1
      }
    }
  }

  5. 错误处理和兼容性策略

  分层错误处理机制

  参数级错误自动修复：
  const GPT5_ERROR_HANDLERS: ErrorHandler[] = [
    {
      type: ModelErrorType.MaxCompletionTokens,
      detect: errMsg => {
        return errMsg.includes("unsupported parameter: 'max_tokens'") &&
               errMsg.includes("'max_completion_tokens'")
      },
      fix: async opts => {
        // 自动修复参数
        opts.max_completion_tokens = opts.max_tokens
        delete opts.max_tokens
      },
    }
  ]

  6. 工具调用兼容性处理

  工具格式转换

  GPT-5 支持自由格式工具：
  buildTools(tools: Tool[]): any[] {
    if (this.capabilities.toolCalling.supportsFreeform) {
      return tools.map(tool => {
        const hasSchema = tool.inputJSONSchema || tool.inputSchema
        const isCustom = !hasSchema

        if (isCustom) {
          // 自定义工具格式
          return {
            type: 'custom',
            name: tool.name,
            description: tool.description || ''
          }
        } else {
          // 传统函数格式
          return {
            type: 'function',
            function: { /* ... */ }
          }
        }
      })
    }
  }

  总结

  Kode 项目的多模型兼容性处理采用了以下核心策略：

  1. 适配器模式：每个模型类型有专门的适配器处理参数转换
  2. 工厂模式：根据模型能力动态选择正确的适配器
  3. 统一接口：上层应用使用统一的参数接口
  4. 能力注册表：集中管理不同模型的能力特性
  5. 智能错误处理：自动检测和修复参数兼容性问题
  6. 渐进式降级：从高级 API 降级到兼容 API