代码
可直接调用
import asyncio
from langchain.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
class WelcomeAgent:
def __init__(self):
self.llm = ChatOllama(
model="deepseek-r1:8b",
temperature=0.7,
repeat_penalty=1.2, # 抑制重复内容
top_k=50, # 限制采样范围
top_p=0.9,
base_url="http://localhost:11434"
)
# 关键修改:移除模板中的多余空行
# 更新提示模板以包含对话历史
self.prompt = ChatPromptTemplate.from_messages([
("system",
"你是一个友好的对话AI机器人。根据用户输入和对话历史生成聪明得体的回复。请用1-10句话回复,保持自然友好的语气,直接给出最终回答,不要包含任何思考过程,回答要简洁直接。"),
MessagesPlaceholder(variable_name="chat_history"), # 这里将插入历史消息
("human", "{input}"), # 最新的人类输入
]) # 直接连接,无额外空行
self.chain = self.prompt | self.llm | StrOutputParser()
# 存储对话历史(在实际应用中应该使用数据库)
self.conversations = {} # 格式: {session_id: [messages]}
def _get_or_create_history(self, session_id: str):
"""获取或创建对话历史"""
if session_id not in self.conversations:
self.conversations[session_id] = []
return self.conversations[session_id]
def generate_welcome_message_stream(self, input_text: str = "", session_id: str = "default"):
# 获取当前会话的历史记录
chat_history = self._get_or_create_history(session_id)
async def generate_chunks():
buffer = ""
think_closed = False
full_response = ""
separator_shown = False # 标记是否已显示分隔线
# 转换历史消息
langchain_messages = [
HumanMessage(content=msg["content"]) if msg["role"] == "user"
else AIMessage(content=msg["content"])
for msg in chat_history
]
# 添加用户新消息
chat_history.append({"role": "user", "content": input_text})
async for chunk in self.chain.astream({
"input": input_text,
"chat_history": langchain_messages
}):
full_response += chunk
buffer += chunk
# 处理思考块
if not think_closed and "</think>" in buffer:
think_closed = True
think_part, response_part = buffer.split("</think>", 1)
# 流式输出思考内容
for char in think_part:
yield char
await asyncio.sleep(0.01)
# 添加醒目的分割线
yield "\n━━━━━━━━━━━━━━━━━━━━\n\n" # 使用Unicode BOX DRAWINGS字符
buffer = response_part.lstrip()
separator_shown = True
# 输出正文部分
if separator_shown:
for char in buffer:
if char not in ("\n", "\r"):
yield char
await asyncio.sleep(0.01)
buffer = ""
# 保存完整记录(包含思考部分和分割线)
if think_closed:
chat_history.append({
"role": "assistant",
"content": full_response # 包含原始<think>内容和分割线
})
return generate_chunks()