Ollama DeepSeek-R1 集成指南
环境准备
- 安装 Ollama
-
访问 Ollama 官网 下载安装
-
命令行安装模型:
bashollama run deepseek-r1
-
测试运行:
bashollama run deepseek-r1
-
服务端实现
核心功能
- RESTful API 接口
- 流式响应支持
- 文件上传管理
- API 密钥验证
- CORS 跨域支持
python
import logging
import time
import uuid
import requests
import json
import asyncio
from fastapi import FastAPI, HTTPException, Header, Depends, File, UploadFile
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.exceptions import RequestValidationError
from pydantic import BaseModel
from typing import List, Optional
import base64
from io import BytesIO
import os
import shutil
from datetime import datetime, timedelta
from apscheduler.schedulers.background import BackgroundScheduler
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
# 添加 CORS 中间件配置
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # 允许所有来源,生产环境建议设置具体的域名
allow_credentials=True,
allow_methods=["*"], # 允许所有方法
allow_headers=["*"], # 允许所有请求头
)
# 添加自定义异常处理器
@app.exception_handler(RequestValidationError)
async def verify_api_key(authorization: Optional[str] = Header(None, convert_underscores=False)):
VALID_API_KEY = "sk-123456789" # 设置固定的API密钥
# 打印接收到的 authorization 值,用于调试
logging.info(f"Received authorization header: {authorization}")
if not authorization:
raise HTTPException(
status_code=401,
detail={
"code": 401,
"message": "缺少API密钥",
"data": None
}
)
if authorization != VALID_API_KEY:
raise HTTPException(
status_code=401,
detail={
"code": 401,
"message": "无效的API密钥",
"data": None
}
)
# 请求模型 (仅保留必要参数)
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
system: Optional[str] = "" # 系统提示词
# 响应模型 (保持兼容性)
class ChatCompletionResponseChoice(BaseModel):
message: ChatMessage
finish_reason: str = "stop"
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[ChatCompletionResponseChoice]
# 新增:流式响应的模型
class StreamChoice(BaseModel):
delta: ChatMessage
finish_reason: Optional[str] = None
index: int = 0
class StreamResponse(BaseModel):
id: str
object: str = "chat.completion.chunk"
created: int
model: str
choices: List[StreamChoice]
# 新增:图像消息模型
class ImageChatMessage(BaseModel):
role: str
content: str
image_url: Optional[str] = None # 用于存储base64编码的图片
class ImageChatRequest(BaseModel):
model: str
messages: List[ImageChatMessage]
system: Optional[str] = ""
# 添加配置
UPLOAD_DIR = "uploads" # 上传文件保存目录
CLEANUP_INTERVAL_HOURS = 24 # 清理间隔(小时)
FILE_MAX_AGE_HOURS = 48 # 文件最大保存时间(小时)
# 确保上传目录存在
os.makedirs(UPLOAD_DIR, exist_ok=True)
# 清理旧文件的函数
def cleanup_old_files():
try:
current_time = datetime.now()
for filename in os.listdir(UPLOAD_DIR):
file_path = os.path.join(UPLOAD_DIR, filename)
file_modified_time = datetime.fromtimestamp(os.path.getmtime(file_path))
if current_time - file_modified_time > timedelta(hours=FILE_MAX_AGE_HOURS):
os.remove(file_path)
logging.info(f"已删除过期文件: {filename}")
except Exception as e:
logging.error(f"清理文件时出错: {str(e)}")
# 设置定时清理任务
scheduler = BackgroundScheduler()
scheduler.add_job(cleanup_old_files, 'interval', hours=CLEANUP_INTERVAL_HOURS)
scheduler.start()
# 修改API密钥验证函数
async def verify_api_key(
authorization: Optional[str] = Header(
None,
alias="Authorization", # 明确指定 Header 名称
description="Bearer token",
convert_underscores=False
)
):
logging.info(f"Received authorization header: {authorization}")
VALID_API_KEY = "sk-123456789"
if not authorization:
error_detail = {
"code": 401,
"message": "缺少API密钥",
"data": None
}
logging.error(f"Authorization header missing: {error_detail}")
raise HTTPException(status_code=401, detail=error_detail)
try:
if authorization.startswith('Bearer '):
key = authorization[7:]
else:
key = authorization
if key != VALID_API_KEY:
error_detail = {
"code": 401,
"message": "无效的API密钥",
"data": None
}
logging.error(f"Invalid API key: {error_detail}")
raise HTTPException(status_code=401, detail=error_detail)
except Exception as e:
error_detail = {
"code": 401,
"message": "无效的认证格式",
"data": None
}
logging.error(f"Authorization validation error: {str(e)}")
raise HTTPException(status_code=401, detail=error_detail)
return True
@app.post("/chat")
async def chat_completion(
request: ChatCompletionRequest,
authorized: bool = Depends(verify_api_key)
):
# 参数验证
if not request.messages:
raise HTTPException(status_code=400, detail="No messages provided")
# 验证模型名称
allowed_models = ["deepseek-r1:7b"] # 改为 Ollama 模型名称
if request.model not in allowed_models:
raise HTTPException(
status_code=400,
detail=f"仅支持模型: {', '.join(allowed_models)}"
)
try:
# 构建消息列表,将 system 和 context 添加到消息中
messages = []
# 添加系统提示词
if hasattr(request, 'system') and request.system:
messages.append({
"role": "system",
"content": request.system
})
# 添加用户消息
messages.extend([{"role": msg.role, "content": msg.content} for msg in request.messages])
# 准备 Ollama API 请求
ollama_url = "http://localhost:11434/api/chat"
payload = {
"model": "deepseek-r1:7b",
"messages": messages,
"stream": False,
# "options": {
# "temperature": 0.7,
# "top_p": 0.9
# }
}
# 打印请求内容用于调试
logging.info(f"Sending request to Ollama: {payload}")
# 调用 Ollama API
response = requests.post(ollama_url, json=payload)
if response.status_code != 200:
logging.error(f"Ollama API error: {response.text}")
raise HTTPException(status_code=500, detail="Ollama API 调用失败")
# 修改响应解析逻辑
try:
response_data = response.json()
logging.info(f"Ollama response: {response_data}")
# 从 message.content 获取生成的文本
generated_text = response_data.get("message", {}).get("content", "")
if not generated_text:
logging.warning("Empty response from Ollama")
raise HTTPException(status_code=500, detail="模型返回空响应")
# 修改返回格式,包装在业务结构中
chat_response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4()}",
created=int(time.time()),
model=request.model,
choices=[ChatCompletionResponseChoice(
message=ChatMessage(
role="assistant",
content=generated_text
)
)]
)
return JSONResponse(
status_code=200,
content={
"code": 200,
"message": "success",
"data": chat_response.model_dump()
}
)
except ValueError as e:
logging.error(f"JSON 解析错误: {str(e)}")
logging.error(f"响应内容: {response.text}")
raise HTTPException(status_code=500, detail="响应解析失败")
except Exception as e:
logging.error(f"Error: {str(e)}")
return JSONResponse(
status_code=200,
content={
"code": 500,
"message": f"服务器错误: {str(e)}",
"data": None
}
)
@app.post("/chat/stream")
async def chat_completion_stream(
request: ChatCompletionRequest,
authorized: bool = Depends(verify_api_key)
):
# 参数验证
if not request.messages:
raise HTTPException(status_code=400, detail="No messages provided")
# 验证模型名称
allowed_models = ["deepseek-r1:7b"]
if request.model not in allowed_models:
raise HTTPException(
status_code=400,
detail=f"仅支持模型: {', '.join(allowed_models)}"
)
async def generate():
try:
messages = []
if hasattr(request, 'system') and request.system:
messages.append({
"role": "system",
"content": request.system
})
# 修改消息格式,确保正确传递历史消息
for msg in request.messages:
# 移除 think 标签内容
if msg.role == "assistant":
content = msg.content
content = content.replace("<think>\n", "").replace("</think>\n", "")
messages.append({
"role": msg.role,
"content": content
})
else:
messages.append({
"role": msg.role,
"content": msg.content
})
ollama_url = "http://localhost:11434/api/chat"
payload = {
"model": "deepseek-r1:7b",
"messages": messages,
"stream": True,
"context": None # 让 Ollama 自动管理上下文
}
logging.info(f"Sending messages to Ollama: {messages}") # 添加日志
response = requests.post(ollama_url, json=payload, stream=True)
response.raise_for_status()
stream_id = f"chatcmpl-{uuid.uuid4()}"
created_time = int(time.time())
for line in response.iter_lines():
if line:
try:
chunk = line.decode('utf-8')
data = json.loads(chunk)
if "message" in data:
content = data["message"].get("content", "")
# 直接发送任何非空内容
if content is not None: # 允许空字符串通过
stream_response = StreamResponse(
id=stream_id,
created=created_time,
model=request.model,
choices=[
StreamChoice(
delta=ChatMessage(
role="assistant",
content=content
)
)
]
)
wrapped_response = {
"code": 200,
"message": "success",
"data": stream_response.model_dump()
}
yield f"data: {json.dumps(wrapped_response, ensure_ascii=False)}\n\n"
await asyncio.sleep(0.01) # 添加较小的延迟
if data.get("done", False):
final_chunk = StreamResponse(
id=stream_id,
created=created_time,
model=request.model,
choices=[
StreamChoice(
delta=ChatMessage(role="assistant", content=""),
finish_reason="stop"
)
]
)
wrapped_final_response = {
"code": 200,
"message": "success",
"data": final_chunk.model_dump()
}
yield f"data: {json.dumps(wrapped_final_response, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
break
except json.JSONDecodeError as e:
logging.error(f"JSON 解析错误: {str(e)}")
continue
except Exception as e:
logging.error(f"流式生成错误: {str(e)}")
error_response = {
"code": 500,
"message": f"服务器错误: {str(e)}",
"data": None
}
yield f"data: {json.dumps(error_response)}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
@app.post("/upload/image")
async def upload_image(
file: UploadFile = File(...),
authorized: bool = Depends(verify_api_key)
):
try:
# 验证文件类型
allowed_types = ["image/jpeg", "image/png", "image/gif"]
if file.content_type not in allowed_types:
return JSONResponse(
status_code=200,
content={
"code": 400,
"message": "不支持的文件类型",
"data": None
}
)
# 生成唯一文件名
file_extension = file.filename.split('.')[-1]
unique_filename = f"{uuid.uuid4()}.{file_extension}"
file_path = os.path.join(UPLOAD_DIR, unique_filename)
# 保存文件
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# 生成访问URL
file_url = f"/uploads/{unique_filename}"
return JSONResponse(
status_code=200,
content={
"code": 200,
"message": "success",
"data": {
"image_url": file_url
}
}
)
except Exception as e:
logging.error(f"Error uploading image: {str(e)}")
return JSONResponse(
status_code=200,
content={
"code": 500,
"message": f"图片上传失败: {str(e)}",
"data": None
}
)
# 添加静态文件服务
from fastapi.staticfiles import StaticFiles
app.mount("/uploads", StaticFiles(directory=UPLOAD_DIR), name="uploads")
# 在应用关闭时停止调度器
@app.on_event("shutdown")
async def shutdown_event():
scheduler.shutdown()
# 在应用启动时配置日志
@app.on_event("startup")
async def startup_event():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
客户端实现
功能特性
- 实时消息交互
- Markdown 渲染支持
- 流式响应处理
- 消息历史管理
- 自适应布局
js
<template>
<div class="chat-container">
<!-- 顶部导航栏 -->
<div class="nav-bar">
<van-icon name="arrow-left" badge="1" size="26" />
<div style="width: 30px;"></div>
<div class="nav-bar-content">
<div class="title">
<van-icon name="friends" size="26" />
DeepSeek-R1
</div>
<div class="right-icons">
<van-icon name="phone-o" size="26" />
<van-icon name="volume" size="26" color="#0166FF" />
<van-icon name="ellipsis" size="26" />
</div>
</div>
<div style="width: 50px;"></div>
</div>
<div class="message-list">
<div v-for="(msg, index) in messages" :key="index" class="message-item"
:class="msg.role === 'user' ? 'right' : 'left'">
<div class="message-content">
<template v-if="msg.role === 'assistant'">
<div v-if="getThinkContent(msg.content)" class="think-content">
<div class="think-title">思考过程:</div>
{{ getThinkContent(msg.content) }}
</div>
<div class="answer-content" v-html="formatMarkdown(getAnswerContent(msg.content))">
</div>
</template>
<template v-else>
{{ msg.content }}
</template>
</div>
</div>
</div>
<!-- 底部工具栏 -->
<div class="bottom-bar">
<div class="content">
<div class="input">
<van-field v-model="message" type="textarea" placeholder="请输入消息" rows="1" autosize
@keypress.enter.prevent="sendMessage" />
</div>
<div class="action-icons">
<van-icon name="volume" />
<van-icon name="add-o" />
<van-icon name="back-top" @click="sendMessage" />
</div>
</div>
</div>
</div>
</template>
<script>
import { ImagePreview,Toast } from 'vant';
import { gpt_stream, startFlowStream,stream_qa } from '@/api/home'
import { marked } from 'marked';
export default {
name: 'Home',
data() {
return {
messages: [],
message: '',
loading: false
}
},
created() {
},
mounted() {
},
beforeDestroy() {
},
computed: {
},
methods: {
async sendMessage() {
if (!this.message.trim() || this.loading) return
// 添加用户消息
const userMessage = {
role: 'user',
content: this.message.trim()
}
const sendData = {
"model": "deepseek-r1:7b",
"messages": JSON.parse(JSON.stringify([...this.messages, userMessage]))
}
this.messages.push(userMessage)
this.message = ''
this.loading = true
// 添加AI回复消息
const aiMessage = {
role: 'assistant',
content: ''
}
this.messages.push(aiMessage)
stream_qa(sendData, (data) => {
if (data === "data: [DONE]" || data === "data: DONE") {
return
}
// 先找到第一个{ 的位置,然后把第一个{前面的数据删除
const index = data.indexOf("{");
if (index !== -1)
data = data.substring(index);
let jsonData = {}
try {
jsonData = JSON.parse(data);
} catch (error) {
console.error('JSON parse error:', error);
}
console.log(jsonData);
if (jsonData && jsonData.code === 200) {
jsonData = jsonData.data
if (jsonData.error && jsonData.error.message) {
Toast(jsonData.error.message);
this.messages.pop();
this.messages.pop();
this.loading = false
return
}
const value = jsonData?.choices[0]?.delta?.content || "";
if (value != "") {
aiMessage.content += value
// 滚动到底部
this.$nextTick(() => {
const messageList = document.querySelector('.message-list')
messageList.scrollTop = messageList.scrollHeight
})
}
}else{
Toast(jsonData.message);
this.loading = false
this.messages.pop();
this.messages.pop();
return
}
}, (error) => {
console.error('Stream error:', error);
setTimeout(() => {
this.messages.pop();
if (this.messages[this.messages.length - 1].role === "user") {
this.messages.pop();
}
console.log(this.messages);
this.loading = false
}, 500);
}, () => {
console.log('Stream completed');
this.loading = false
});
},
getThinkContent(content) {
const thinkMatch = content.match(/<think>(.*?)<\/think>/s);
return thinkMatch ? thinkMatch[1].trim() : '';
},
formatMarkdown(content) {
return marked(content);
},
getAnswerContent(content) {
return content.replace(/<think>.*?<\/think>/s, '').trim();
},
},
}
</script>
<style lang="scss" scoped>
.chat-container {
background: #FFFFFF;
height: calc(100vh - 120px);
display: flex;
flex-direction: column;
padding-top: 70px;
padding-bottom: 50px;
.nav-bar {
position: fixed;
top: 0;
left: 0;
right: 0;
padding: 10px 15px;
display: flex;
align-items: center;
background-color: #fff;
border-bottom: 1px solid #fff;
.nav-bar-content {
padding: 10px 0;
flex: 1;
display: flex;
align-items: center;
justify-content: space-between;
border-bottom: 2px solid #0166FF;
border-radius: 6px;
.title {
margin-left: 4px;
font-size: 16px;
font-weight: 500;
display: flex;
align-items: center;
}
.right-icons {
display: flex;
gap: 15px;
}
}
}
.message-list {
flex: 1;
padding: 15px;
overflow-y: auto;
}
.message-item {
display: flex;
gap: 10px;
margin-bottom: 20px;
&.left {
.avatar {
width: 40px;
height: 40px;
border-radius: 3px;
background: #fff;
}
.message-content {
background: #F5F5F5;
border-radius: 4px;
padding: 12px;
max-width: 80%;
font-size: 15px;
line-height: 1.6;
border-radius: 10px;
border-bottom-left-radius: 0px;
.detail-text {
margin-top: 8px;
color: #666;
}
.think-content {
background: #f0f9ff;
padding: 8px;
margin-bottom: 10px;
border-radius: 6px;
font-size: 14px;
color: #666;
.think-title {
color: #0166FF;
font-weight: 500;
margin-bottom: 4px;
}
}
.answer-content {
margin-top: 8px;
}
:deep(.answer-content) {
h1, h2, h3, h4, h5, h6 {
margin: 16px 0 8px;
font-weight: 600;
}
p {
margin: 8px 0;
}
code {
background: #f1f1f1;
padding: 2px 4px;
border-radius: 4px;
font-family: monospace;
}
pre {
background: #f5f5f5;
padding: 12px;
border-radius: 4px;
overflow-x: auto;
code {
background: none;
padding: 0;
}
}
}
}
}
&.right {
flex-direction: row-reverse;
.message-content {
background: #0166FF;
color: #fff;
padding: 12px;
padding: 8px 12px;
max-width: 80%;
font-size: 15px;
line-height: 1.6;
border-radius: 10px;
border-bottom-right-radius: 0px;
}
}
}
.bottom-bar {
position: fixed;
bottom: 0;
left: 0;
right: 0;
padding: 10px 30px;
.content {
padding: 10px 15px;
background: #FFFFFF;
border: 1px solid #e5e5e5;
border-radius: 10px;
// 下阴影
box-shadow: 0px -2px 10px 0px rgba(0, 0, 0, 0.1);
display: flex;
align-items: center;
justify-content: space-between;
.action-icons {
display: flex;
justify-content: center;
gap: 20px;
font-size: 20px;
}
.input {
flex: 1;
margin-right: 10px;
:deep(.van-field) {
padding: 0;
background: transparent;
.van-field__control {
min-height: 20px;
font-size: 14px;
}
}
}
}
}
}
</style>
效果演示
部署说明
服务启动
bash
复制
css
uvicorn main:app --host 0.0.0.0 --port 8000
环境要求
组件 | 版本要求 |
---|---|
Python | ≥ 3.8 |
FastAPI | ≥ 0.68.0 |
Ollama | ≥ 0.1.0 |
注意事项
- 确保 Ollama 服务运行在
11434
端口 - API 密钥默认设置为
sk-123456789
- 上传文件保存目录为
./uploads
- 建议生产环境配置 HTTPS
下一章
搭配agent使用