1. 安装依赖
bash
运行
pip install flask
2. 完整代码:mcp_server.py
python
运行
from flask import Flask, request, jsonify
app = Flask(__name__)
# MCP 服务状态
mcp_status = {
"server": "running",
"model_loaded": False,
"current_model": None,
"task_count": 0
}
# ------------------------------
# MCP 基础接口
# ------------------------------
@app.route('/mcp/ping', methods=['GET'])
def ping():
"""心跳检测"""
return jsonify({
"code": 0,
"msg": "pong",
"server": "MCP Server v1.0"
})
@app.route('/mcp/info', methods=['GET'])
def info():
"""服务信息"""
return jsonify({
"code": 0,
"data": mcp_status
})
# ------------------------------
# 模型加载/卸载
# ------------------------------
@app.route('/mcp/model/load', methods=['POST'])
def load_model():
data = request.get_json()
model_name = data.get("model_name")
if not model_name:
return jsonify({"code": 400, "msg": "model_name is required"}), 400
# 模拟加载模型
mcp_status["current_model"] = model_name
mcp_status["model_loaded"] = True
return jsonify({
"code": 0,
"msg": f"model {model_name} loaded",
"data": mcp_status
})
@app.route('/mcp/model/unload', methods=['POST'])
def unload_model():
mcp_status["current_model"] = None
mcp_status["model_loaded"] = False
return jsonify({"code": 0, "msg": "model unloaded"})
# ------------------------------
# 模型推理(核心 MCP 指令)
# ------------------------------
@app.route('/mcp/infer', methods=['POST'])
def infer():
if not mcp_status["model_loaded"]:
return jsonify({"code": 500, "msg": "no model loaded"}), 500
data = request.get_json()
prompt = data.get("prompt", "")
params = data.get("params", {})
# 模拟推理
mcp_status["task_count"] += 1
result = f"[MCP Response] Model: {mcp_status['current_model']}, Prompt: {prompt[:30]}..."
return jsonify({
"code": 0,
"task_id": f"task_{mcp_status['task_count']}",
"result": result,
"params": params
})
# ------------------------------
# 启动服务
# ------------------------------
if __name__ == '__main__':
print("MCP Server running on http://127.0.0.1:8000")
app.run(host='0.0.0.0', port=8000, debug=True)
3. 接口说明(标准 MCP 风格)
表格
| 接口 | 方法 | 用途 |
|---|---|---|
/mcp/ping |
GET | 心跳 |
/mcp/info |
GET | 服务状态 |
/mcp/model/load |
POST | 加载模型 |
/mcp/model/unload |
POST | 卸载模型 |
/mcp/infer |
POST | 执行推理 |
测试示例(curl)
bash
运行
# 心跳
curl http://127.0.0.1:8000/mcp/ping
# 加载模型
curl -X POST http://127.0.0.1:8000/mcp/model/load \
-H "Content-Type: application/json" \
-d '{"model_name":"llama-7b"}'
# 推理
curl -X POST http://127.0.0.1:8000/mcp/infer \
-H "Content-Type: application/json" \
-d '{"prompt":"hello mcp server","params":{"temperature":0.7}}'
4. 如果你要工业级 MCP Server
我可以基于以下方向进一步扩展:
- FastAPI 版本(更高性能)
- 支持流式返回 SSE / WebSocket
- 加入认证(API Key)
- 多模型并发调度
- 对接真实模型(Transformers /vLLM/ Ollama)