0. 启发来源
可以先看看下面这个post:
https://vercel.com/blog/we-removed-80-percent-of-our-agents-tools
coding沙箱确实能解决好多的问题,比如数值计算,画图相关,代码执行相关,可以弥补rag,text2sql很多时候无法解决一些深度问答的不足。那我们来实现一个吧。
1. 项目概述
1.1 项目简介
沙箱代码执行服务是一个基于FastAPI的Web服务,专为安全执行不受信任的代码而设计。该服务为每个代码执行请求创建独立的Docker容器沙箱环境,确保执行环境的隔离性和安全性。
1.2 核心特性
- 完全隔离:每个请求在独立的Docker容器中执行
- 资源限制:可配置CPU、内存、执行时间限制
- 自动依赖管理:支持自动安装requirements.txt中的依赖
- 灵活的配置:支持自定义Docker镜像、执行命令等
- 易用的API:提供RESTful API接口
2. 系统架构
2.1 技术栈
后端框架: FastAPI (Python 3.11+)
容器技术: Docker
API文档: OpenAPI (自动生成)
服务器: Uvicorn (ASGI服务器)
2.2 架构设计
客户端请求 → FastAPI应用 → 沙箱管理器 → Docker容器 → 返回结果
3. 核心功能详解
3.1 沙箱执行流程
python
# 简化执行流程
1. 验证代码目录有效性
2. 创建CodeBox沙箱实例
3. 拷贝代码到容器/app目录
4. 自动创建虚拟环境并安装依赖
5. 执行指定命令
6. 收集执行结果
7. 清理沙箱环境
3.2 安全特性
- 资源隔离:使用Docker容器提供进程级隔离
- 资源限制 :
- 内存限制:默认512MB
- CPU配额:默认0.5个CPU核心
- 执行超时:默认30秒
- 环境隔离:为每个请求创建全新的容器环境
3.3 自动依赖管理
python
# 依赖安装逻辑
if 存在requirements.txt:
1. 创建Python虚拟环境(/app/venv)
2. 使用虚拟环境中的pip安装依赖
3. 替换命令中的python为venv解释器
4. API接口设计
4.1 数据模型
python
class ExecuteRequest:
code_dir: str # 代码目录路径
command: List[str] # 执行命令数组
image: str = "python:3.11-slim" # Docker镜像
timeout: int = 30 # 超时时间(秒)
mem_limit: str = "512m" # 内存限制
cpu_quota: float = 0.5 # CPU配额
auto_install_deps: bool = True # 自动安装依赖
class ExecuteResponse:
success: bool # 执行是否成功
exit_code: int # 退出码
stdout: str # 标准输出
stderr: str # 错误输出
message: Optional[str] # 附加消息
4.2 端点说明
GET /
用途 : 健康检查
响应: 服务状态信息
POST /execute
用途 : 执行代码请求
请求体 : ExecuteRequest
响应 : ExecuteResponse
示例请求:
json
{
"code_dir": "./demo_proj",
"command": ["python", "-m", "pytest"],
"timeout": 30,
"mem_limit": "1g",
"cpu_quota": 1.0
}
5. 完整代码
python
#!/usr/bin/env python3
"""
沙箱执行服务:接收代码执行请求,每个请求在独立的 Docker 沙箱中运行
"""
import sys
from pathlib import Path
from typing import Optional, List
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
import uvicorn
from sandbox import CodeBox
# ---------- 请求/响应模型 ----------
class ExecuteRequest(BaseModel):
"""代码执行请求"""
code_dir: str = Field(..., description="代码目录路径(相对于服务根目录)")
command: List[str] = Field(..., description="要执行的命令,如 ['python', '-m', 'pytest']")
image: Optional[str] = Field(default="python:3.11-slim", description="Docker 镜像")
timeout: Optional[int] = Field(default=30, description="超时时间(秒)")
mem_limit: Optional[str] = Field(default="512m", description="内存限制")
cpu_quota: Optional[float] = Field(default=0.5, description="CPU 配额")
auto_install_deps: Optional[bool] = Field(default=True, description="是否自动安装依赖(如果存在 requirements.txt)")
class ExecuteResponse(BaseModel):
"""执行结果响应"""
success: bool
exit_code: int
stdout: str
stderr: str
message: Optional[str] = None
# ---------- 服务逻辑 ----------
def execute_code_in_sandbox(
code_dir: str,
command: List[str],
image: str = "python:3.11-slim",
timeout: int = 30,
mem_limit: str = "512m",
cpu_quota: float = 0.5,
auto_install_deps: bool = True
) -> ExecuteResponse:
"""
在独立沙箱中执行代码
Args:
code_dir: 代码目录路径
command: 要执行的命令
image: Docker 镜像
timeout: 超时时间
mem_limit: 内存限制
cpu_quota: CPU 配额
auto_install_deps: 是否自动安装依赖
Returns:
ExecuteResponse: 执行结果
"""
box = None
try:
# 验证代码目录是否存在
code_path = Path(code_dir)
if not code_path.exists() or not code_path.is_dir():
raise HTTPException(
status_code=400,
detail=f"代码目录不存在: {code_dir}"
)
# 创建独立的沙箱实例
box = CodeBox(
image=image,
timeout=timeout,
mem_limit=mem_limit,
cpu_quota=cpu_quota
)
# 拷贝代码到沙箱
box.copy_dir(code_path)
# 自动安装依赖(如果存在 requirements.txt)
if auto_install_deps:
exitcode, _, _ = box.exec(["ls", "/app/requirements.txt"])
if exitcode == 0:
# 创建虚拟环境
box.exec(["python", "-m", "venv", "/app/venv"])
# 安装依赖
pip_code, pip_out, pip_err = box.exec(
["/app/venv/bin/pip", "install", "-r", "/app/requirements.txt"]
)
if pip_code != 0:
return ExecuteResponse(
success=False,
exit_code=pip_code,
stdout=pip_out,
stderr=pip_err,
message="依赖安装失败"
)
# 替换命令中的 python 为 venv 中的解释器
venv_cmd = [
"/app/venv/bin/" + c if c == "python" else c
for c in command
]
command = venv_cmd
# 执行命令
exit_code, stdout, stderr = box.exec(command)
return ExecuteResponse(
success=exit_code == 0,
exit_code=exit_code,
stdout=stdout,
stderr=stderr,
message="执行完成" if exit_code == 0 else "执行失败"
)
except HTTPException:
raise
except Exception as e:
return ExecuteResponse(
success=False,
exit_code=-1,
stdout="",
stderr=str(e),
message=f"执行过程中发生错误: {type(e).__name__}"
)
finally:
# 确保清理沙箱
if box is not None:
try:
box.destroy()
except Exception as e:
print(f"[warn] 清理沙箱失败: {e}", file=sys.stderr)
# ---------- FastAPI 应用 ----------
app = FastAPI(
title="沙箱代码执行服务",
description="为每个请求创建独立的 Docker 沙箱执行代码,互不干扰",
version="1.0.0"
)
@app.get("/")
async def root():
"""健康检查端点"""
return {
"service": "沙箱代码执行服务",
"status": "running",
"version": "1.0.0"
}
@app.post("/execute", response_model=ExecuteResponse)
async def execute(request: ExecuteRequest):
"""
执行代码请求
每个请求都会创建独立的 Docker 沙箱,执行完成后自动清理。
示例请求:
```json
{
"code_dir": "./demo_proj",
"command": ["python", "-m", "pytest"],
"timeout": 30
}
```
"""
return execute_code_in_sandbox(
code_dir=request.code_dir,
command=request.command,
image=request.image,
timeout=request.timeout,
mem_limit=request.mem_limit,
cpu_quota=request.cpu_quota,
auto_install_deps=request.auto_install_deps
)
if __name__ == "__main__":
uvicorn.run(
"sandbox_service:app",
host="0.0.0.0",
port=8000,
reload=True
)
6. 测试服务
python
#!/usr/bin/env python3
"""
测试沙箱服务的完整测试脚本
"""
import requests
import json
import sys
import time
from typing import Dict, Any
# 服务地址
SERVICE_URL = "http://localhost:8000"
TIMEOUT = 10 # 请求超时时间(秒)
def print_section(title: str):
"""打印测试章节标题"""
print("\n" + "=" * 60)
print(f" {title}")
print("=" * 60)
def print_result(success: bool, message: str = ""):
"""打印测试结果"""
status = "✅ PASS" if success else "❌ FAIL"
print(f"{status} {message}")
def check_service_available() -> bool:
"""检查服务是否可用"""
try:
response = requests.get(f"{SERVICE_URL}/", timeout=5)
if response.status_code == 200:
data = response.json()
print(f"服务状态: {data.get('status', 'unknown')}")
return True
return False
except requests.exceptions.RequestException as e:
print(f"❌ 无法连接到服务: {e}")
print(f" 请确保服务已启动: python sandbox_service.py")
return False
def test_health_check():
"""测试1: 健康检查端点"""
print_section("测试1: 健康检查")
try:
response = requests.get(f"{SERVICE_URL}/", timeout=TIMEOUT)
print(f"状态码: {response.status_code}")
if response.status_code == 200:
data = response.json()
print(f"响应内容: {json.dumps(data, indent=2, ensure_ascii=False)}")
print_result(True, "健康检查通过")
return True
else:
print_result(False, f"健康检查失败,状态码: {response.status_code}")
return False
except Exception as e:
print_result(False, f"健康检查异常: {e}")
return False
def test_execute_pytest():
"""测试2: 执行 pytest 测试"""
print_section("测试2: 执行 pytest 测试")
url = f"{SERVICE_URL}/execute"
payload = {
"code_dir": "./demo_proj",
"command": ["python", "-m", "pytest", "-v"],
"timeout": 30,
"auto_install_deps": True
}
print(f"请求 URL: {url}")
print(f"请求内容:\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
try:
start_time = time.time()
response = requests.post(url, json=payload, timeout=60)
elapsed = time.time() - start_time
print(f"\n响应状态码: {response.status_code}")
print(f"执行耗时: {elapsed:.2f} 秒")
if response.status_code == 200:
result = response.json()
print(f"\n执行结果:")
print(f" 成功: {result['success']}")
print(f" 退出码: {result['exit_code']}")
print(f" 消息: {result.get('message', 'N/A')}")
if result.get('stdout'):
print(f"\n标准输出:\n{'-' * 40}")
print(result['stdout'])
if result.get('stderr'):
print(f"\n标准错误:\n{'-' * 40}")
print(result['stderr'])
success = result['success'] and result['exit_code'] == 0
print_result(success, "pytest 测试执行")
return success
else:
print(f"错误响应: {response.text}")
print_result(False, f"请求失败,状态码: {response.status_code}")
return False
except Exception as e:
print_result(False, f"执行异常: {e}")
return False
def test_execute_simple_python():
"""测试3: 执行简单的 Python 代码"""
print_section("测试3: 执行简单 Python 代码")
url = f"{SERVICE_URL}/execute"
# 创建一个临时测试目录的模拟(实际应该创建文件)
# 这里我们测试一个简单的 python -c 命令
payload = {
"code_dir": "./demo_proj", # 使用现有目录
"command": ["python", "-c", "print('Hello from sandbox!'); import sys; sys.exit(0)"],
"timeout": 10,
"auto_install_deps": False
}
print(f"请求内容:\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
try:
response = requests.post(url, json=payload, timeout=30)
if response.status_code == 200:
result = response.json()
print(f"执行结果: 成功={result['success']}, 退出码={result['exit_code']}")
if result.get('stdout'):
print(f"输出: {result['stdout'].strip()}")
success = result['success'] and result['exit_code'] == 0
print_result(success, "简单 Python 代码执行")
return success
else:
print_result(False, f"请求失败")
return False
except Exception as e:
print_result(False, f"执行异常: {e}")
return False
def test_invalid_directory():
"""测试4: 测试无效目录处理"""
print_section("测试4: 无效目录错误处理")
url = f"{SERVICE_URL}/execute"
payload = {
"code_dir": "./nonexistent_directory_12345",
"command": ["python", "--version"],
"timeout": 10
}
print(f"请求内容:\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
try:
response = requests.post(url, json=payload, timeout=30)
if response.status_code == 400:
result = response.json()
print(f"正确返回错误: {result.get('detail', 'N/A')}")
print_result(True, "错误处理正确")
return True
else:
print(f"意外响应: 状态码={response.status_code}, 内容={response.text}")
print_result(False, "错误处理不正确")
return False
except Exception as e:
print_result(False, f"执行异常: {e}")
return False
def test_concurrent_requests():
"""测试5: 并发请求(验证隔离性)"""
print_section("测试5: 并发请求隔离性测试")
url = f"{SERVICE_URL}/execute"
import concurrent.futures
def make_request(request_id: int):
payload = {
"code_dir": "./demo_proj",
"command": ["python", "-c", f"import time; time.sleep(1); print('Request {request_id}')"],
"timeout": 10,
"auto_install_deps": False
}
try:
response = requests.post(url, json=payload, timeout=30)
return request_id, response.status_code == 200, response.json() if response.status_code == 200 else None
except Exception as e:
return request_id, False, {"error": str(e)}
print("发送 3 个并发请求...")
start_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
futures = [executor.submit(make_request, i) for i in range(1, 4)]
results = [f.result() for f in concurrent.futures.as_completed(futures)]
elapsed = time.time() - start_time
print(f"并发执行完成,耗时: {elapsed:.2f} 秒")
success_count = sum(1 for _, success, _ in results if success)
print(f"成功请求数: {success_count}/3")
for req_id, success, result in sorted(results):
status = "✅" if success else "❌"
print(f" 请求 {req_id}: {status}")
if success and result and result.get('stdout'):
print(f" 输出: {result['stdout'].strip()}")
all_success = success_count == 3
print_result(all_success, "并发请求隔离性")
return all_success
def run_all_tests():
"""运行所有测试"""
print("\n" + "=" * 60)
print(" 沙箱服务测试套件")
print("=" * 60)
# 检查服务是否可用
if not check_service_available():
print("\n❌ 服务不可用,请先启动服务:")
print(" python sandbox_service.py")
sys.exit(1)
# 运行测试
tests = [
("健康检查", test_health_check),
("Pytest 测试", test_execute_pytest),
("简单 Python 代码", test_execute_simple_python),
("错误处理", test_invalid_directory),
("并发请求", test_concurrent_requests),
]
results = []
for name, test_func in tests:
try:
result = test_func()
results.append((name, result))
except Exception as e:
print(f"\n❌ 测试 '{name}' 发生异常: {e}")
results.append((name, False))
# 汇总结果
print_section("测试结果汇总")
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "✅ PASS" if result else "❌ FAIL"
print(f"{status} - {name}")
print(f"\n总计: {passed}/{total} 测试通过")
if passed == total:
print("\n🎉 所有测试通过!")
return 0
else:
print(f"\n⚠️ 有 {total - passed} 个测试失败")
return 1
if __name__ == "__main__":
if len(sys.argv) > 1:
command = sys.argv[1]
if command == "health":
test_health_check()
elif command == "pytest":
test_execute_pytest()
elif command == "simple":
test_execute_simple_python()
elif command == "error":
test_invalid_directory()
elif command == "concurrent":
test_concurrent_requests()
else:
print(f"未知命令: {command}")
print("可用命令: health, pytest, simple, error, concurrent")
sys.exit(1)
else:
# 运行所有测试
sys.exit(run_all_tests())
