作者:吴业亮
博客:wuyeliang.blog.csdn.net
一、环境准备(Ubuntu22.04 + Conda)
1. 安装Conda(若未安装)
bash
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
source ~/.bashrc
2. 创建并激活Conda环境
bash
conda create -n chatbot_eval python=3.10 -y
conda activate chatbot_eval
3. 安装依赖(匹配V100 CUDA 11.8)
bash
# 安装CUDA 11.8依赖(若未安装)
conda install cudatoolkit=11.8 cudnn=8.7.0 -c conda-forge -y
# 安装核心依赖
pip install torch==2.1.0+cu118 torchvision==0.16.0+cu118 torchaudio==2.1.0+cu118 --index-url https://download.pytorch.org/whl/cu118
pip install vllm==0.4.2 langgraph==0.1.14 fastapi==0.104.1 uvicorn==0.24.0 pydantic==2.4.2 pandas==2.1.3 numpy==1.26.1 python-multipart==0.0.6 jinja2==3.1.2 transformers==4.35.2
二、项目结构
chatbot_eval_system/
├── config.py # 系统配置
├── model_wrapper.py # VLLM模型封装
├── langgraph_agent.py # LangGraph评估流程
├── web_server.py # FastAPI Web服务
├── frontend/
│ └── index.html # 前端页面
├── run.py # 启动脚本
└── requirements.txt # 依赖清单
三、完整代码实现
1. requirements.txt
txt
python>=3.10
vllm==0.4.2
langgraph==0.1.14
fastapi==0.104.1
uvicorn==0.24.0
pydantic==2.4.2
pandas==2.1.3
numpy==1.26.1
python-multipart==0.0.6
jinja2==3.1.2
torch==2.1.0+cu118
transformers==4.35.2
accelerate==0.24.1
2. config.py(核心配置,需替换模型路径)
python
import os
# ==================== VLLM配置 ====================
# 替换为你的本地大模型路径(HF格式,如Llama-2-7B-chat-hf)
MODEL_PATH = "/path/to/your/local/model/Llama-2-7B-chat-hf"
VLLM_CONFIG = {
"gpu_memory_utilization": 0.85, # V100 32G适配
"tensor_parallel_size": 1, # 单卡部署
"max_num_batched_tokens": 1024,
"max_num_seqs": 64,
"temperature": 0.7,
"top_p": 0.9,
"max_tokens": 512,
}
# ==================== 评估配置 ====================
EVAL_DIMENSIONS = [
"准确性(回答是否符合事实)",
"相关性(回答是否匹配问题)",
"流畅性(语言是否通顺自然)",
"完整性(是否覆盖核心问题)"
]
EVAL_PROMPT_TEMPLATE = """
请评估以下聊天机器人回答的质量,评估维度包括:{dimensions}
问题:{question}
机器人回答:{answer}
请逐条给出评分(1-5分,5分为最优)和简短评语,最终给出综合评分(1-5分)。
评估结果格式:
1. {dimensions[0]}:评分+评语
2. {dimensions[1]}:评分+评语
3. {dimensions[2]}:评分+评语
4. {dimensions[3]}:评分+评语
综合评分:评分+评语
"""
# ==================== Web配置 ====================
HOST = "0.0.0.0"
PORT = 8000
3. model_wrapper.py(VLLM模型封装)
python
from vllm import LLM, SamplingParams
from config import MODEL_PATH, VLLM_CONFIG
# 全局VLLM模型实例(单例)
_llm = None
_sampling_params = SamplingParams(
temperature=VLLM_CONFIG["temperature"],
top_p=VLLM_CONFIG["top_p"],
max_tokens=VLLM_CONFIG["max_tokens"],
)
def get_vllm_model():
"""获取VLLM模型单例"""
global _llm
if _llm is None:
_llm = LLM(
model=MODEL_PATH,
gpu_memory_utilization=VLLM_CONFIG["gpu_memory_utilization"],
tensor_parallel_size=VLLM_CONFIG["tensor_parallel_size"],
max_num_batched_tokens=VLLM_CONFIG["max_num_batched_tokens"],
max_num_seqs=VLLM_CONFIG["max_num_seqs"],
trust_remote_code=True, # 兼容自定义模型
)
return _llm
def generate_response(prompt: str) -> str:
"""
调用VLLM生成回答
:param prompt: 输入提示词
:return: 生成的文本
"""
llm = get_vllm_model()
outputs = llm.generate(prompts=[prompt], sampling_params=_sampling_params)
return outputs[0].outputs[0].text.strip()
if __name__ == "__main__":
# 测试模型调用
test_prompt = "你好,请介绍一下自己"
print(generate_response(test_prompt))
4. langgraph_agent.py(LangGraph评估流程)
python
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field
from typing import Dict, Any
from model_wrapper import generate_response
from config import EVAL_DIMENSIONS, EVAL_PROMPT_TEMPLATE
# 定义图的状态结构
class EvalState(BaseModel):
question: str = Field(description="用户输入的问题")
chatbot_answer: str = Field(default="", description="机器人回答")
eval_result: str = Field(default="", description="评估结果")
# 定义节点1:聊天机器人生成回答
def chatbot_node(state: EvalState) -> Dict[str, Any]:
"""生成机器人回答"""
prompt = f"用户问题:{state.question}\n请给出准确、简洁的回答:"
answer = generate_response(prompt)
return {"chatbot_answer": answer}
# 定义节点2:评估机器人回答
def eval_node(state: EvalState) -> Dict[str, Any]:
"""评估回答质量"""
eval_prompt = EVAL_PROMPT_TEMPLATE.format(
dimensions="、".join(EVAL_DIMENSIONS),
question=state.question,
answer=state.chatbot_answer
)
eval_result = generate_response(eval_prompt)
return {"eval_result": eval_result}
# 构建LangGraph图
def build_eval_graph() -> StateGraph:
"""构建评估流程图"""
graph = StateGraph(EvalState)
# 添加节点
graph.add_node("chatbot", chatbot_node)
graph.add_node("eval", eval_node)
# 设置流程:起始 -> 生成回答 -> 评估 -> 结束
graph.set_entry_point("chatbot")
graph.add_edge("chatbot", "eval")
graph.add_edge("eval", END)
return graph.compile()
# 初始化评估图(全局实例)
eval_graph = build_eval_graph()
if __name__ == "__main__":
# 测试评估流程
test_input = {"question": "Ubuntu22.04如何安装Conda?"}
result = eval_graph.invoke(test_input)
print("问题:", result["question"])
print("机器人回答:", result["chatbot_answer"])
print("评估结果:", result["eval_result"])
5. web_server.py(FastAPI Web服务)
python
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from langgraph_agent import eval_graph
from config import HOST, PORT
# 初始化FastAPI
app = FastAPI(title="Chatbot Evaluation System")
# 挂载静态文件(前端页面)
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
templates = Jinja2Templates(directory="frontend")
# 定义请求体模型
class EvalRequest(BaseModel):
question: str
# 前端页面路由
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
# 评估接口
@app.post("/api/evaluate")
async def evaluate(request: EvalRequest):
try:
# 调用LangGraph评估流程
result = eval_graph.invoke({"question": request.question})
return JSONResponse({
"code": 200,
"data": {
"question": result["question"],
"chatbot_answer": result["chatbot_answer"],
"eval_result": result["eval_result"]
},
"msg": "success"
})
except Exception as e:
return JSONResponse({
"code": 500,
"data": None,
"msg": f"error: {str(e)}"
})
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host=HOST, port=PORT)
6. frontend/index.html(前端页面)
html
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>聊天机器人评估系统</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: Arial, sans-serif;
}
.container {
max-width: 1000px;
margin: 50px auto;
padding: 20px;
}
.input-area {
margin-bottom: 20px;
}
#question-input {
width: 80%;
height: 80px;
padding: 10px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
resize: none;
}
#submit-btn {
width: 18%;
height: 80px;
background-color: #4285f4;
color: white;
border: none;
border-radius: 4px;
font-size: 16px;
cursor: pointer;
}
#submit-btn:hover {
background-color: #3367d6;
}
.result-area {
margin-top: 30px;
padding: 20px;
border: 1px solid #eee;
border-radius: 4px;
}
.result-title {
font-size: 18px;
font-weight: bold;
margin-bottom: 10px;
color: #333;
}
.result-content {
font-size: 16px;
line-height: 1.6;
color: #666;
white-space: pre-wrap;
}
.loading {
color: #4285f4;
font-size: 16px;
display: none;
}
</style>
</head>
<body>
<div class="container">
<h1>聊天机器人模拟评估系统</h1>
<div class="input-area">
<textarea id="question-input" placeholder="请输入需要测试的问题(例如:Ubuntu22.04如何安装Conda?)"></textarea>
<button id="submit-btn">提交评估</button>
</div>
<div class="loading" id="loading">正在评估中,请稍候...</div>
<div class="result-area" id="result-area" style="display: none;">
<div class="result-title">问题:</div>
<div class="result-content" id="question-result"></div>
<div class="result-title" style="margin-top: 20px;">机器人回答:</div>
<div class="result-content" id="answer-result"></div>
<div class="result-title" style="margin-top: 20px;">评估结果:</div>
<div class="result-content" id="eval-result"></div>
</div>
</div>
<script>
const submitBtn = document.getElementById('submit-btn');
const questionInput = document.getElementById('question-input');
const loading = document.getElementById('loading');
const resultArea = document.getElementById('result-area');
const questionResult = document.getElementById('question-result');
const answerResult = document.getElementById('answer-result');
const evalResult = document.getElementById('eval-result');
submitBtn.addEventListener('click', async () => {
const question = questionInput.value.trim();
if (!question) {
alert('请输入问题!');
return;
}
// 显示加载状态
loading.style.display = 'block';
resultArea.style.display = 'none';
try {
// 调用后端接口
const response = await fetch('/api/evaluate', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ question: question }),
});
const data = await response.json();
if (data.code === 200) {
// 显示结果
questionResult.textContent = data.data.question;
answerResult.textContent = data.data.chatbot_answer;
evalResult.textContent = data.data.eval_result;
resultArea.style.display = 'block';
} else {
alert('评估失败:' + data.msg);
}
} catch (error) {
alert('网络错误:' + error.message);
} finally {
// 隐藏加载状态
loading.style.display = 'none';
}
});
</script>
</body>
</html>
7. run.py(启动脚本)
python
import uvicorn
from config import HOST, PORT
from web_server import app
if __name__ == "__main__":
print(f"启动聊天机器人评估系统:http://{HOST}:{PORT}")
uvicorn.run(
app,
host=HOST,
port=PORT,
log_level="info",
workers=1 # 单进程适配VLLM
)
四、运行步骤
1. 准备本地模型
将HF格式的大模型(如Llama-2-7B-chat-hf)放到本地路径,并修改config.py中的MODEL_PATH为实际路径。
2. 启动服务
bash
conda activate chatbot_eval
cd chatbot_eval_system
python run.py
3. 访问系统
打开浏览器访问 http://<服务器IP>:8000,输入问题(如"Ubuntu22.04如何安装Conda?"),点击"提交评估",即可看到机器人回答和评估结果。
五、关键适配说明(V100 32G)
- VLLM参数:
gpu_memory_utilization=0.85(占用27.2G显存,预留部分空间)、tensor_parallel_size=1(单卡部署); - 模型选择:推荐7B量级模型(如Llama-2-7B、Qwen-7B等),32G显存可流畅运行;
- 进程配置:
workers=1(避免多进程重复加载模型导致显存溢出)。
六、常见问题解决
- 显存不足:降低
gpu_memory_utilization(如0.8),或换用更小的模型(如4B); - 模型加载失败:确认模型路径正确、模型为HF格式、安装
trust_remote_code=True; - CUDA错误:确认CUDA 11.8已安装,且torch版本与CUDA匹配。