后端服务实现:支持多语言的vLLM服务
本部分包含后端服务的完整实现,包括vLLM模型服务、多语言支持和API接口。
1. 后端目录结构
backend/
├── server.py # FastAPI服务入口
├── agent.py # Agent逻辑实现
├── i18n.py # 国际化支持
├── locales/ # 语言文件
│ ├── en.json
│ ├── zh-CN.json
│ └── ja.json
└── requirements.txt # 依赖项
2. 依赖项清单 (requirements.txt)
fastapi==0.104.1
uvicorn==0.24.0
vllm==0.2.7
transformers==4.36.0
torch==2.1.0
pydantic==2.5.0
python-multipart==0.0.6
pyyaml==6.0.1
3. 国际化支持模块 (i18n.py)
"""
国际化(i18n)支持模块
"""
import json
from pathlib import Path
from typing import Dict, Any
class I18n:
"""国际化管理类"""
def __init__(self, default_lang: str = 'en'):
self.default_lang = default_lang
self.current_lang = default_lang
self.translations: Dict[str, Dict] = {}
self.load_translations()
def load_translations(self):
"""加载所有语言文件"""
locales_dir = Path(__file__).parent / 'locales'
for lang_file in locales_dir.glob('*.json'):
lang_code = lang_file.stem
try:
with open(lang_file, 'r', encoding='utf-8') as f:
self.translations[lang_code] = json.load(f)
except Exception as e:
print(f"Failed to load language file {lang_file}: {e}")
def set_language(self, lang: str):
"""设置当前语言"""
if lang in self.translations:
self.current_lang = lang
else:
# 尝试匹配语言代码前缀
for available_lang in self.translations.keys():
if lang.startswith(available_lang.split('-')[0]):
self.current_lang = available_lang
return
self.current_lang = self.default_lang
def get(self, key: str, **kwargs) -> str:
"""获取翻译文本"""
keys = key.split('.')
# 获取当前语言的翻译
translation = self.translations.get(self.current_lang, {})
# 逐级获取嵌套的值
for k in keys:
if isinstance(translation, dict):
translation = translation.get(k)
else:
break
# 如果找不到,尝试默认语言
if translation is None:
translation = self.translations.get(self.default_lang, {})
for k in keys:
if isinstance(translation, dict):
translation = translation.get(k)
else:
break
# 如果还是找不到,返回key
if translation is None:
return key
# 格式化字符串
if kwargs:
try:
return translation.format(**kwargs)
except:
return translation
return translation
def get_language_from_header(self, accept_language: str) -> str:
"""从Accept-Language请求头获取语言"""
if not accept_language:
return self.default_lang
# 解析Accept-Language头
languages = []
for lang in accept_language.split(','):
parts = lang.strip().split(';')
language = parts[0]
if len(parts) > 1 and parts[1].startswith('q='):
priority = float(parts[1][2:])
else:
priority = 1.0
languages.append((priority, language))
# 按优先级排序
languages.sort(reverse=True)
# 找到第一个支持的语言
for _, lang_code in languages:
# 精确匹配
if lang_code in self.translations:
return lang_code
# 语言代码匹配(如zh匹配zh-CN)
lang_prefix = lang_code.split('-')[0].lower()
for available_lang in self.translations.keys():
if available_lang.lower().startswith(lang_prefix):
return available_lang
return self.default_lang
# 全局i18n实例
i18n = I18n()
# 便捷函数
def _(key: str, **kwargs) -> str:
"""翻译函数的快捷方式"""
return i18n.get(key, **kwargs)
4. 语言文件 (locales)
en.json
{
"server": {
"loading_model": "Loading model: {model}",
"model_loaded": "Model loaded successfully",
"agent_not_initialized": "Agent not initialized",
"unsupported_action": "Unsupported action: {action}",
"request_failed": "Failed to process request: {error}",
"server_healthy": "Server is healthy",
"starting": "Starting Coding Agent Backend..."
},
"agent": {
"actions": {
"complete": "Code Completion",
"generate": "Code Generation",
"explain": "Code Explanation",
"refactor": "Code Refactoring",
"debug": "Debug Analysis",
"test": "Test Generation"
},
"prompts": {
"complete": "Complete the following {language} code:\n\n{code_before}\n<CURSOR>\n{code_after}\n\nContinue from <CURSOR>:",
"generate": "You are an expert programmer. Generate {language} code based on the following instruction:\n\nInstruction: {instruction}\nContext: {context}\n\nPlease provide clean, well-commented code:\n```{language}\n",
"explain": "Explain the following {language} code in detail:\n\n```{language}\n{code}\n```\n\nProvide a clear explanation:",
"refactor": "Refactor the following {language} code to improve readability, performance, and follow best practices:\n\nOriginal code:\n```{language}\n{code}\n```\n\nRefactored code with improvements:\n```{language}\n",
"debug": "Debug the following {language} code and identify any issues:\n\nCode:\n```{language}\n{code}\n```\n\nIdentify bugs and provide fixes:",
"test": "Generate comprehensive unit tests for the following {language} code:\n\nCode to test:\n```{language}\n{code}\n```\n\nTest code:\n```{language}\n"
},
"suggestions": {
"simplified": "Code has been simplified",
"extracted_function": "Extracted into functions",
"improved_format": "Improved code formatting"
}
}
}
zh-CN.json
{
"server": {
"loading_model": "正在加载模型: {model}",
"model_loaded": "模型加载成功",
"agent_not_initialized": "Agent未初始化",
"unsupported_action": "不支持的操作: {action}",
"request_failed": "处理请求失败: {error}",
"server_healthy": "服务器运行正常",
"starting": "正在启动编码助手后端..."
},
"agent": {
"actions": {
"complete": "代码补全",
"generate": "代码生成",
"explain": "代码解释",
"refactor": "代码重构",
"debug": "调试分析",
"test": "测试生成"
},
"prompts": {
"complete": "补全以下{language}代码:\n\n{code_before}\n<CURSOR>\n{code_after}\n\n从<CURSOR>继续:",
"generate": "你是一位专业的程序员。根据以下指令生成{language}代码:\n\n指令: {instruction}\n上下文: {context}\n\n请提供清晰、注释完善的代码:\n```{language}\n",
"explain": "详细解释以下{language}代码:\n\n```{language}\n{code}\n```\n\n请提供清晰的解释:",
"refactor": "重构以下{language}代码,提高可读性、性能并遵循最佳实践:\n\n原始代码:\n```{language}\n{code}\n```\n\n改进后的代码:\n```{language}\n",
"debug": "调试以下{language}代码并识别问题:\n\n代码:\n```{language}\n{code}\n```\n\n识别错误并提供修复方案:",
"test": "为以下{language}代码生成全面的单元测试:\n\n待测试代码:\n```{language}\n{code}\n```\n\n测试代码:\n```{language}\n"
},
"suggestions": {
"simplified": "代码已简化",
"extracted_function": "已提取为函数",
"improved_format": "改进了代码格式"
}
}
}
ja.json
{
"server": {
"loading_model": "モデルをロード中: {model}",
"model_loaded": "モデルのロードが完了しました",
"agent_not_initialized": "エージェントが初期化されていません",
"unsupported_action": "サポートされていないアクション: {action}",
"request_failed": "リクエストの処理に失敗しました: {error}",
"server_healthy": "サーバーは正常に動作しています",
"starting": "コーディングエージェントバックエンドを起動中..."
},
"agent": {
"actions": {
"complete": "コード補完",
"generate": "コード生成",
"explain": "コード説明",
"refactor": "コードリファクタリング",
"debug": "デバッグ分析",
"test": "テスト生成"
},
"prompts": {
"complete": "次の{language}コードを補完してください:\n\n{code_before}\n<CURSOR>\n{code_after}\n\n<CURSOR>から続けてください:",
"generate": "あなたはエキスパートプログラマーです。次の指示に基づいて{language}コードを生成してください:\n\n指示: {instruction}\nコンテキスト: {context}\n\nクリーンでコメント付きのコードを提供してください:\n```{language}\n",
"explain": "次の{language}コードを詳しく説明してください:\n\n```{language}\n{code}\n```\n\n明確な説明を提供してください:",
"refactor": "次の{language}コードをリファクタリングして、可読性、パフォーマンスを向上させ、ベストプラクティスに従ってください:\n\n元のコード:\n```{language}\n{code}\n```\n\n改善されたコード:\n```{language}\n",
"debug": "次の{language}コードをデバッグして問題を特定してください:\n\nコード:\n```{language}\n{code}\n```\n\nバグを特定して修正を提供してください:",
"test": "次の{language}コードに対して包括的な単体テストを生成してください:\n\nテスト対象コード:\n```{language}\n{code}\n```\n\nテストコード:\n```{language}\n"
},
"suggestions": {
"simplified": "コードが簡略化されました",
"extracted_function": "関数として抽出されました",
"improved_format": "コード形式が改善されました"
}
}
}
5. Agent逻辑实现 (agent.py)
from vllm import SamplingParams
import re
from datetime import datetime
from typing import List
from .i18n import i18n, _
class CodeRequest:
def __init__(self, action, code="", context="", language="python", cursor_position=0, instruction="", max_tokens=512, temperature=0.7):
self.action = action
self.code = code
self.context = context
self.language = language
self.cursor_position = cursor_position
self.instruction = instruction
self.max_tokens = max_tokens
self.temperature = temperature
class CodingAgent:
"""编码助手Agent(多语言支持)"""
def __init__(self, engine, tokenizer):
self.engine = engine
self.tokenizer = tokenizer
self.conversation_history = []
async def generate_completion(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt = i18n.get("agent.prompts.complete", language=request.language, code_before=request.code[:request.cursor_position], code_after=request.code[request.cursor_position:])
result = await self._generate(prompt, request.max_tokens, request.temperature)
code = self._extract_code(result)
return {
"result": code,
"confidence": 0.85,
"suggestions": [code],
"metadata": {"action": i18n.get("agent.actions.complete"), "timestamp": datetime.now().isoformat()}
}
async def generate_code(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt_suffix = "" if lang == 'en' else ("\nPlease comment in " + lang) if lang == 'ja' else "\n请用中文注释。"
prompt = i18n.get("agent.prompts.generate", language=request.language, instruction=request.instruction, context=request.context) + prompt_suffix
result = await self._generate(prompt, request.max_tokens, request.temperature)
code = self._extract_code(result)
return {
"result": code,
"confidence": 0.9,
"metadata": {"action": i18n.get("agent.actions.generate"), "language": request.language}
}
async def explain_code(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt = i18n.get("agent.prompts.explain", language=request.language, code=request.code)
result = await self._generate(prompt, 512, 0.7)
return {
"result": result,
"confidence": 0.95,
"metadata": {"action": i18n.get("agent.actions.explain")}
}
async def refactor_code(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt = i18n.get("agent.prompts.refactor", language=request.language, code=request.code)
result = await self._generate(prompt, request.max_tokens, 0.5)
code = self._extract_code(result)
suggestions = self._generate_suggestions(request.code, code, lang)
return {
"result": code,
"confidence": 0.8,
"suggestions": suggestions,
"metadata": {"action": i18n.get("agent.actions.refactor")}
}
async def debug_code(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt = i18n.get("agent.prompts.debug", language=request.language, code=request.code)
result = await self._generate(prompt, request.max_tokens, 0.3)
return {
"result": result,
"confidence": 0.85,
"metadata": {"action": i18n.get("agent.actions.debug")}
}
async def generate_tests(self, request: CodeRequest, lang: str = 'en') -> dict:
i18n.set_language(lang)
prompt = i18n.get("agent.prompts.test", language=request.language, code=request.code)
result = await self._generate(prompt, request.max_tokens, 0.5)
code = self._extract_code(result)
return {
"result": code,
"confidence": 0.8,
"metadata": {"action": i18n.get("agent.actions.test")}
}
async def _generate(self, prompt: str, max_tokens: int, temperature: float) -> str:
sampling_params = SamplingParams(
temperature=temperature,
max_tokens=max_tokens,
top_p=0.95,
frequency_penalty=0.1,
)
request_id = f"req-{datetime.now().timestamp()}"
results_generator = self.engine.generate(prompt, sampling_params, request_id)
async for request_output in results_generator:
if request_output.finished:
return request_output.outputs[0].text
return ""
def _extract_code(self, text: str) -> str:
code_pattern = r'```[\w]*\n(.*?)```'
matches = re.findall(code_pattern, text, re.DOTALL)
return matches[0].strip() if matches else text.strip()
def _generate_suggestions(self, original: str, refactored: str, lang: str) -> List[str]:
i18n.set_language(lang)
suggestions = []
if len(refactored) < len(original):
suggestions.append(i18n.get("agent.suggestions.simplified"))
if "def " in refactored and "def " not in original:
suggestions.append(i18n.get("agent.suggestions.extracted_function"))
if refactored.count("\n") > original.count("\n"):
suggestions.append(i18n.get("agent.suggestions.improved_format"))
return suggestions
6. FastAPI服务入口 (server.py)
from fastapi import FastAPI, HTTPException, Header, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import uvicorn
import asyncio
from vllm import AsyncLLMEngine, SamplingParams, AsyncEngineArgs
from transformers import AutoTokenizer
import json
import logging
from datetime import datetime
import os
import re
from i18n import i18n, _
from agent import CodingAgent, CodeRequest
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API请求模型
class CodeRequestModel(BaseModel):
action: str # 'complete', 'generate', 'explain', 'refactor', 'debug', 'test'
code: Optional[str] = ""
context: Optional[str] = ""
language: Optional[str] = "python"
cursor_position: Optional[int] = 0
instruction: Optional[str] = ""
max_tokens: Optional[int] = 512
temperature: Optional[float] = 0.7
ui_language: Optional[str] = None # UI语言设置
class CodeResponse(BaseModel):
result: str
confidence: float
suggestions: List[str] = []
metadata: Dict[str, Any] = {}
# 初始化FastAPI
app = FastAPI(title="Local Coding Agent API")
# CORS配置(允许IDE扩展访问)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# vLLM引擎配置
MODEL_NAME = os.getenv("MODEL_NAME", "codellama/CodeLlama-7b-Python-hf") # 可替换为您的模型
engine = None
tokenizer = None
agent = None
async def initialize_vllm():
"""初始化vLLM引擎"""
global engine, tokenizer
logger.info(_("server.loading_model", model=MODEL_NAME))
# vLLM引擎参数
engine_args = AsyncEngineArgs(
model=MODEL_NAME,
trust_remote_code=True,
max_model_len=4096,
gpu_memory_utilization=0.9,
)
engine = AsyncLLMEngine.from_engine_args(engine_args)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
logger.info(_("server.model_loaded"))
@app.on_event("startup")
async def startup_event():
"""启动时初始化"""
await initialize_vllm()
global agent
agent = CodingAgent(engine, tokenizer)
@app.post("/api/code", response_model=CodeResponse)
async def process_code_request(request: CodeRequestModel, accept_language: Optional[str] = Header(None)):
"""统一的代码处理端点"""
try:
if not agent:
raise HTTPException(status_code=503, detail=_("server.agent_not_initialized"))
# 确定使用的语言
ui_lang = request.ui_language or i18n.get_language_from_header(accept_language or 'en')
i18n.set_language(ui_lang)
# 创建CodeRequest实例
code_request = CodeRequest(
action=request.action,
code=request.code,
context=request.context,
language=request.language,
cursor_position=request.cursor_position,
instruction=request.instruction,
max_tokens=request.max_tokens,
temperature=request.temperature
)
# 根据action调用不同的方法
action_map = {
"complete": agent.generate_completion,
"generate": agent.generate_code,
"explain": agent.explain_code,
"refactor": agent.refactor_code,
"debug": agent.debug_code,
"test": agent.generate_tests,
}
handler = action_map.get(request.action)
if not handler:
raise HTTPException(status_code=400, detail=_("server.unsupported_action", action=request.action))
response_data = await handler(code_request, ui_lang)
return CodeResponse(**response_data)
except Exception as e:
logger.error(_("server.request_failed", error=str(e)))
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/health")
async def health_check(accept_language: Optional[str] = Header(None)):
"""健康检查"""
lang = i18n.get_language_from_header(accept_language or 'en')
i18n.set_language(lang)
return {
"status": "healthy",
"message": _("server.server_healthy"),
"model": MODEL_NAME,
"timestamp": datetime.now().isoformat()
}
@app.get("/api/capabilities")
async def get_capabilities(accept_language: Optional[str] = Header(None)):
"""获取Agent能力列表"""
lang = i18n.get_language_from_header(accept_language or 'en')
i18n.set_language(lang)
return {
"actions": list(i18n.translations[lang].get("agent", {}).get("actions", {}).keys()),
"languages": ["python", "javascript", "java", "cpp", "go", "rust"],
"ui_languages": list(i18n.translations.keys()),
"max_tokens": 2048,
"model": MODEL_NAME
}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)