代码如下:
public class OllamaConfig {
@Value("${ollama.base-url:http://localhost:11434}")
private String baseUrl;
@Value("${ollama.model-name:deepseek-r1:8b}")
private String defaultModelName;
@Value("${ollama.temperature:0.7}")
private Double temperature;
@Value("${ollama.timeout-seconds:300}")
private int timeoutSeconds;
}
页面:
html
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI 聊天助手</title>
<!-- Markdown 渲染 -->
<script src="https://cdn.jsdelivr.net/npm/marked@12.0.0/marked.min.js"></script>
<!-- 代码高亮 -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/styles/github-dark.min.css">
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/core.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/java.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/javascript.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/python.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/bash.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/sql.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/highlight.js@11.9.0/lib/languages/json.min.js"></script>
<style>
/* ── Reset & Variables ─────────────────────────────── */
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg-primary: #0d1117;
--bg-secondary: #161b22;
--bg-panel: #21262d;
--bg-input: #30363d;
--border: #30363d;
--border-hover: #484f58;
--text-primary: #e6edf3;
--text-secondary: #8b949e;
--text-muted: #6e7681;
--accent: #2ea043;
--accent-hover: #3fb950;
--user-bubble: #1f6feb;
--think-bg: #0d1117;
--think-border: #21262d;
--think-text: #7d8590;
--think-accent: #388bfd;
--code-copy-bg: rgba(255,255,255,0.08);
--radius-sm: 6px;
--radius-md: 12px;
--radius-lg: 18px;
--font-sans: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans SC", sans-serif;
--font-mono: "Cascadia Code", "Fira Code", "JetBrains Mono", "SF Mono", monospace;
}
body {
font-family: var(--font-sans);
background: var(--bg-primary);
color: var(--text-primary);
height: 100dvh;
display: flex;
flex-direction: column;
overflow: hidden;
}
/* ── Header ─────────────────────────────────────────── */
.header {
background: var(--bg-secondary);
border-bottom: 1px solid var(--border);
padding: 0 20px;
height: 56px;
display: flex;
align-items: center;
justify-content: space-between;
flex-shrink: 0;
user-select: none;
}
.header-left { display: flex; align-items: center; gap: 10px; }
.logo-icon {
width: 32px; height: 32px;
background: linear-gradient(135deg, #2ea043, #1f6feb);
border-radius: var(--radius-sm);
display: flex; align-items: center; justify-content: center;
font-size: 16px;
}
.logo-text { font-size: 15px; font-weight: 600; }
.model-badge {
display: flex; align-items: center; gap: 5px;
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: 20px;
padding: 3px 10px;
font-size: 11px; color: var(--text-secondary);
}
.model-badge .dot {
width: 6px; height: 6px;
background: var(--accent); border-radius: 50%;
animation: pulse 2s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.4; }
}
/* ── 模型选择器 ──────────────────────────────────────── */
.model-selector-wrap {
position: relative;
display: flex;
align-items: center;
}
.model-select {
appearance: none;
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text-primary);
font-size: 12px;
font-family: var(--font-sans);
padding: 5px 28px 5px 10px;
cursor: pointer;
outline: none;
transition: border-color 0.15s;
max-width: 180px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.model-select:hover,
.model-select:focus { border-color: var(--border-hover); }
.model-select option {
background: var(--bg-panel);
color: var(--text-primary);
}
.model-select-chevron {
position: absolute;
right: 8px;
pointer-events: none;
color: var(--text-muted);
font-size: 10px;
}
.model-loading {
font-size: 11px;
color: var(--text-muted);
padding: 0 6px;
}
.new-chat-btn {
display: flex; align-items: center; gap: 6px;
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text-secondary);
font-size: 13px; padding: 6px 12px;
cursor: pointer; transition: all 0.15s;
}
.new-chat-btn:hover {
background: var(--bg-input);
border-color: var(--border-hover);
color: var(--text-primary);
}
/* ── Chat Area ───────────────────────────────────────── */
.chat-container {
flex: 1; overflow-y: auto;
padding: 20px 0; scroll-behavior: smooth;
}
.chat-container::-webkit-scrollbar { width: 6px; }
.chat-container::-webkit-scrollbar-track { background: transparent; }
.chat-container::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
.messages {
max-width: 780px; margin: 0 auto;
padding: 0 16px;
display: flex; flex-direction: column; gap: 28px;
}
/* ── Welcome ─────────────────────────────────────────── */
.welcome {
display: flex; flex-direction: column;
align-items: center; justify-content: center;
padding: 60px 20px; text-align: center; gap: 16px;
}
.welcome-icon { font-size: 52px; line-height: 1; }
.welcome h1 { font-size: 24px; font-weight: 600; }
.welcome p { font-size: 14px; color: var(--text-secondary); max-width: 400px; line-height: 1.6; }
.welcome-hints {
display: grid; grid-template-columns: 1fr 1fr;
gap: 10px; margin-top: 8px;
width: 100%; max-width: 480px;
}
.hint-card {
background: var(--bg-secondary); border: 1px solid var(--border);
border-radius: var(--radius-md); padding: 12px 14px;
font-size: 12px; color: var(--text-secondary);
cursor: pointer; transition: all 0.15s; text-align: left;
}
.hint-card:hover {
background: var(--bg-panel); border-color: var(--border-hover);
color: var(--text-primary);
}
/* ── Messages ────────────────────────────────────────── */
.message { display: flex; gap: 12px; animation: fadeUp 0.2s ease; }
@keyframes fadeUp {
from { opacity: 0; transform: translateY(8px); }
to { opacity: 1; transform: translateY(0); }
}
.message.user { flex-direction: row-reverse; }
.avatar {
width: 32px; height: 32px; border-radius: 50%;
display: flex; align-items: center; justify-content: center;
font-size: 14px; flex-shrink: 0; margin-top: 4px;
}
.message.user .avatar { background: var(--user-bubble); }
.message.ai .avatar { background: linear-gradient(135deg, #2ea043, #1f6feb); }
.bubble {
max-width: 78%; border-radius: var(--radius-lg);
padding: 12px 16px; font-size: 14px; line-height: 1.75;
}
.message.user .bubble {
background: var(--user-bubble); color: #fff;
border-bottom-right-radius: var(--radius-sm);
}
.message.ai .bubble {
background: var(--bg-secondary); border: 1px solid var(--border);
color: var(--text-primary); border-bottom-left-radius: var(--radius-sm);
}
/* ── Think block ─────────────────────────────────────── */
.think-block {
background: var(--think-bg);
border: 1px solid var(--think-border);
border-radius: var(--radius-sm);
margin-bottom: 12px; overflow: hidden;
}
.think-header {
display: flex; align-items: center; gap: 7px;
padding: 7px 12px;
cursor: pointer; user-select: none;
font-size: 12px; color: var(--think-text);
transition: background 0.12s;
}
.think-header:hover { background: var(--bg-panel); }
.think-icon { font-size: 13px; }
.think-label { flex: 1; }
.think-chevron {
font-size: 9px; transition: transform 0.2s;
color: var(--text-muted);
}
.think-header.open .think-chevron { transform: rotate(90deg); }
.think-count {
font-size: 10px; color: var(--text-muted);
background: var(--bg-panel);
border-radius: 10px; padding: 1px 7px;
}
.think-content {
display: none;
padding: 10px 14px 12px;
font-size: 12.5px; color: var(--think-text);
line-height: 1.65; font-style: italic;
border-top: 1px solid var(--think-border);
white-space: pre-wrap; word-break: break-word;
}
.think-content.visible { display: block; }
/* ── Streaming indicator ─────────────────────────────── */
.streaming-raw {
font-size: 13.5px; line-height: 1.7;
color: var(--text-secondary);
white-space: pre-wrap; word-break: break-word;
}
.thinking-hint {
color: var(--text-muted); font-size: 12px; font-style: italic;
}
/* ── Divider between think & answer ─────────────────── */
.answer-divider {
height: 1px; background: var(--border);
margin: 10px 0 12px; border: none;
}
/* ── 记忆状态徽章 ────────────────────────────────────── */
.memory-badge {
display: inline-flex; align-items: center; gap: 4px;
margin-top: 8px; padding: 2px 8px;
background: var(--bg-panel); border: 1px solid var(--border);
border-radius: 10px; font-size: 10px; color: var(--text-muted);
user-select: none;
}
.memory-badge .mem-dot {
width: 5px; height: 5px; border-radius: 50%;
background: var(--accent);
}
/* ── Markdown inside bubble ──────────────────────────── */
.bubble .md-body p { margin-bottom: 10px; }
.bubble .md-body p:last-child { margin-bottom: 0; }
.bubble .md-body h1,
.bubble .md-body h2,
.bubble .md-body h3 { margin: 16px 0 6px; font-weight: 600; line-height: 1.3; }
.bubble .md-body h1 { font-size: 18px; }
.bubble .md-body h2 { font-size: 16px; }
.bubble .md-body h3 { font-size: 14px; }
.bubble .md-body ul,
.bubble .md-body ol { margin: 6px 0 6px 20px; }
.bubble .md-body li { margin-bottom: 4px; }
.bubble .md-body strong { color: #e2c08d; }
.bubble .md-body em { color: var(--text-secondary); }
.bubble .md-body code {
font-family: var(--font-mono); font-size: 12.5px;
background: var(--bg-panel); padding: 2px 6px;
border-radius: 4px; color: #79c0ff;
}
.message.user .bubble .md-body code { background: rgba(255,255,255,0.2); color: #fff; }
.bubble .md-body pre {
margin: 10px 0; border-radius: var(--radius-sm);
overflow: auto; position: relative;
}
.bubble .md-body pre code {
background: transparent; padding: 0; color: inherit;
}
/* 代码块复制按钮 */
.code-block-wrapper { position: relative; }
.copy-btn {
position: absolute; top: 8px; right: 8px;
background: var(--code-copy-bg); border: none;
border-radius: 4px; padding: 3px 8px;
font-size: 11px; color: var(--text-secondary);
cursor: pointer; opacity: 0; transition: opacity 0.15s;
}
.code-block-wrapper:hover .copy-btn { opacity: 1; }
.copy-btn.copied { color: var(--accent); }
.bubble .md-body blockquote {
border-left: 3px solid var(--border-hover);
padding-left: 12px; margin: 8px 0;
color: var(--text-secondary);
}
.bubble .md-body table {
width: 100%; border-collapse: collapse;
margin: 10px 0; font-size: 13px;
}
.bubble .md-body th,
.bubble .md-body td { border: 1px solid var(--border); padding: 6px 10px; }
.bubble .md-body th { background: var(--bg-panel); font-weight: 600; }
.bubble .md-body a { color: #58a6ff; text-decoration: none; }
.bubble .md-body a:hover { text-decoration: underline; }
.bubble .md-body hr { border: none; border-top: 1px solid var(--border); margin: 12px 0; }
/* ── Cursor ──────────────────────────────────────────── */
.cursor {
display: inline-block; width: 2px; height: 1em;
background: var(--text-secondary); margin-left: 1px;
vertical-align: text-bottom;
animation: blink 0.8s step-end infinite;
}
@keyframes blink {
0%, 100% { opacity: 1; }
50% { opacity: 0; }
}
/* ── Typing indicator ────────────────────────────────── */
.typing-dots { display: flex; gap: 4px; align-items: center; padding: 4px 0; }
.typing-dots span {
width: 6px; height: 6px;
background: var(--text-muted); border-radius: 50%;
animation: bounce 1.2s infinite;
}
.typing-dots span:nth-child(2) { animation-delay: 0.2s; }
.typing-dots span:nth-child(3) { animation-delay: 0.4s; }
@keyframes bounce {
0%, 80%, 100% { transform: translateY(0); }
40% { transform: translateY(-6px); }
}
/* ── Input Area ──────────────────────────────────────── */
.input-area {
border-top: 1px solid var(--border);
background: var(--bg-secondary);
padding: 14px 20px 18px; flex-shrink: 0;
}
.input-wrapper {
max-width: 780px; margin: 0 auto;
display: flex; align-items: flex-end; gap: 10px;
background: var(--bg-input); border: 1px solid var(--border);
border-radius: var(--radius-md); padding: 10px 14px;
transition: border-color 0.15s;
}
.input-wrapper:focus-within { border-color: var(--border-hover); }
#message-input {
flex: 1; background: transparent; border: none; outline: none;
color: var(--text-primary); font-family: var(--font-sans);
font-size: 14px; line-height: 1.6;
resize: none; max-height: 200px; overflow-y: auto;
}
#message-input::placeholder { color: var(--text-muted); }
#message-input::-webkit-scrollbar { width: 4px; }
#message-input::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
.send-btn {
width: 34px; height: 34px; border-radius: var(--radius-sm);
border: none; background: var(--accent); color: #fff;
cursor: pointer; display: flex; align-items: center; justify-content: center;
transition: all 0.15s; flex-shrink: 0;
}
.send-btn:hover:not(:disabled) { background: var(--accent-hover); }
.send-btn:disabled { background: var(--bg-panel); color: var(--text-muted); cursor: not-allowed; }
.send-btn svg { width: 16px; height: 16px; }
.input-footer {
max-width: 780px; margin: 6px auto 0;
display: flex; justify-content: space-between; align-items: center;
}
.input-hint { font-size: 11px; color: var(--text-muted); }
/* ── 模板栏 ──────────────────────────────────────────── */
.template-bar {
max-width: 780px; margin: 0 auto 10px;
display: flex; gap: 6px; flex-wrap: wrap;
}
.tpl-btn {
display: flex; align-items: center; gap: 5px;
background: var(--bg-panel); border: 1px solid var(--border);
border-radius: 20px; padding: 4px 12px;
font-size: 12px; color: var(--text-secondary);
cursor: pointer; transition: all 0.15s; white-space: nowrap;
user-select: none;
}
.tpl-btn:hover {
border-color: var(--border-hover); color: var(--text-primary);
background: var(--bg-input);
}
.tpl-btn.active {
border-color: var(--accent);
color: var(--accent);
background: rgba(46,160,67,0.1);
}
.tpl-btn .tpl-icon { font-size: 13px; }
/* ── Toast ───────────────────────────────────────────── */
.toast {
position: fixed; bottom: 80px; left: 50%;
transform: translateX(-50%) translateY(20px);
background: var(--bg-panel); border: 1px solid var(--border);
color: var(--text-secondary); padding: 8px 16px;
border-radius: 20px; font-size: 12px;
opacity: 0; transition: all 0.2s;
pointer-events: none; z-index: 100;
}
.toast.show { opacity: 1; transform: translateX(-50%) translateY(0); }
/* ── Responsive ──────────────────────────────────────── */
@media (max-width: 600px) {
.model-badge { display: none; }
.bubble { max-width: 90%; }
.messages { padding: 0 10px; }
}
</style>
</head>
<body>
<!-- ── Header ── -->
<header class="header">
<div class="header-left">
<div class="logo-icon">🤖</div>
<span class="logo-text">AI 聊天助手</span>
<div class="model-badge">
<div class="dot"></div>
本地运行
</div>
</div>
<div style="display:flex;align-items:center;gap:10px">
<!-- 模型选择器 -->
<div class="model-selector-wrap">
<span class="model-loading" id="model-loading">加载中...</span>
<select class="model-select" id="model-select" style="display:none" title="选择模型"></select>
<span class="model-select-chevron" id="model-chevron" style="display:none">▼</span>
</div>
<button class="new-chat-btn" id="new-chat-btn">
<svg viewBox="0 0 16 16" fill="currentColor" width="13" height="13">
<path d="M8 0a1 1 0 0 1 1 1v6h6a1 1 0 1 1 0 2H9v6a1 1 0 1 1-2 0V9H1a1 1 0 0 1 0-2h6V1a1 1 0 0 1 1-1z"/>
</svg>
新对话
</button>
</div>
</header>
<!-- ── Chat ── -->
<div class="chat-container" id="chat-container">
<div class="messages" id="messages">
<div class="welcome" id="welcome-screen">
<div class="welcome-icon">✨</div>
<h1>你好,我是 AI 助手</h1>
<p>由本地模型驱动,完全在你的本地设备上运行,无需联网。</p>
<div class="welcome-hints">
<div class="hint-card" data-text="用 Java 实现一个快速排序算法">
💻 用 Java 实现一个快速排序算法
</div>
<div class="hint-card" data-text="解释一下什么是 Spring Boot,以及它的核心特性">
📚 解释 Spring Boot 的核心特性
</div>
<div class="hint-card" data-text="写一个 SQL 查询,找出订单金额最高的前 10 名用户">
🗄️ 写一个查询最高订单的 SQL
</div>
<div class="hint-card" data-text="帮我检查下面这段代码有什么问题,并给出优化建议">
🔍 帮我审查并优化一段代码
</div>
</div>
</div>
</div>
</div>
<!-- ── Input ── -->
<div class="input-area">
<!-- 模板快捷栏 -->
<div class="template-bar" id="template-bar"></div>
<div class="input-wrapper">
<textarea
id="message-input"
placeholder="输入消息,Enter 发送,Shift+Enter 换行..."
rows="1"
></textarea>
<button class="send-btn" id="send-btn" disabled>
<svg viewBox="0 0 16 16" fill="currentColor">
<path d="M.989 8 0 2.68l15.417 5.315-15.417 5.32.989-5.316zm.827-4.853L2.36 7.203l8.43.795-9.144-4.851zm8.43 4.851-8.43.795.544 2.923 7.886-3.718z"/>
</svg>
</button>
</div>
<div class="input-footer">
<span class="input-hint">Enter 发送 · Shift+Enter 换行 · 支持 Markdown</span>
</div>
</div>
<div class="toast" id="toast"></div>
<script>
// ── 全局状态 ───────────────────────────────────────────────
let sessionId = null;
let isStreaming = false;
let selectedModel = null; // 当前选中的模型名
let selectedTplId = 'general'; // 当前选中的模板 ID
// ── DOM ────────────────────────────────────────────────────
const messagesEl = document.getElementById('messages');
const inputEl = document.getElementById('message-input');
const sendBtn = document.getElementById('send-btn');
const newChatBtn = document.getElementById('new-chat-btn');
const chatContainer = document.getElementById('chat-container');
const toastEl = document.getElementById('toast');
const modelSelect = document.getElementById('model-select');
const modelLoading = document.getElementById('model-loading');
const modelChevron = document.getElementById('model-chevron');
const templateBar = document.getElementById('template-bar');
// ── 加载模型列表 ───────────────────────────────────────────
async function loadModels() {
try {
const resp = await fetch('/api/models');
const data = await resp.json();
const models = data.models || [];
const defaultModel = data.default || '';
modelLoading.style.display = 'none';
if (models.length === 0) {
modelLoading.textContent = '无可用模型';
modelLoading.style.display = 'inline';
return;
}
models.forEach(name => {
const opt = document.createElement('option');
opt.value = name;
opt.textContent = name;
if (name === defaultModel) opt.selected = true;
modelSelect.appendChild(opt);
});
selectedModel = modelSelect.value;
modelSelect.style.display = 'inline-block';
modelChevron.style.display = 'inline';
modelSelect.addEventListener('change', () => {
selectedModel = modelSelect.value;
// 切换模型时清除当前会话(不同模型上下文应重置)
if (sessionId) {
fetch(`/api/chat/session/${sessionId}`, { method: 'DELETE' }).catch(() => {});
sessionId = null;
}
showToast(`已切换到 ${selectedModel}`);
});
if (data.error) showToast(data.error);
} catch (e) {
modelLoading.textContent = '模型加载失败';
console.error('加载模型列表失败:', e);
}
}
loadModels();
// ── 加载提示词模板列表 ─────────────────────────────────────
async function loadTemplates() {
try {
const resp = await fetch('/api/templates');
const data = await resp.json();
const templates = data.templates || [];
const defaultId = data.default || 'general';
selectedTplId = defaultId;
templateBar.innerHTML = '';
templates.forEach(tpl => {
const btn = document.createElement('button');
btn.className = 'tpl-btn' + (tpl.id === defaultId ? ' active' : '');
btn.dataset.id = tpl.id;
btn.title = tpl.description;
btn.innerHTML = `<span class="tpl-icon">${tpl.icon}</span>${tpl.name}`;
btn.addEventListener('click', () => selectTemplate(tpl.id, tpl.name, tpl.icon));
templateBar.appendChild(btn);
});
} catch (e) {
console.error('加载模板失败:', e);
}
}
function selectTemplate(id, name, icon) {
if (selectedTplId === id) return;
selectedTplId = id;
// 更新按钮高亮
templateBar.querySelectorAll('.tpl-btn').forEach(b => {
b.classList.toggle('active', b.dataset.id === id);
});
// 切换模板时清除当前会话(系统提示词已变化)
if (sessionId) {
fetch(`/api/chat/session/${sessionId}`, { method: 'DELETE' }).catch(() => {});
sessionId = null;
}
showToast(`已切换到 ${icon} ${name}`);
}
loadTemplates();
// ── marked 配置 ────────────────────────────────────────────
marked.setOptions({
breaks: true,
gfm: true,
highlight(code, lang) {
if (lang && hljs.getLanguage(lang)) {
return hljs.highlight(code, { language: lang }).value;
}
return hljs.highlightAuto(code).value;
}
});
// ── 输入框自动增高 ─────────────────────────────────────────
inputEl.addEventListener('input', () => {
inputEl.style.height = 'auto';
inputEl.style.height = Math.min(inputEl.scrollHeight, 200) + 'px';
sendBtn.disabled = !inputEl.value.trim() || isStreaming;
});
inputEl.addEventListener('keydown', e => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
if (!sendBtn.disabled) sendMessage();
}
});
sendBtn.addEventListener('click', sendMessage);
newChatBtn.addEventListener('click', newChat);
document.querySelectorAll('.hint-card').forEach(card => {
card.addEventListener('click', () => {
inputEl.value = card.dataset.text;
inputEl.dispatchEvent(new Event('input'));
inputEl.focus();
});
});
// ── 发送消息 ───────────────────────────────────────────────
async function sendMessage() {
const text = inputEl.value.trim();
if (!text || isStreaming) return;
document.getElementById('welcome-screen')?.remove();
appendUserMessage(text);
inputEl.value = '';
inputEl.style.height = 'auto';
sendBtn.disabled = true;
isStreaming = true;
const aiMsgEl = appendAiTyping();
try {
const response = await fetch('/api/chat/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ sessionId, message: text, modelName: selectedModel, templateId: selectedTplId })
});
if (!response.ok) throw new Error(`HTTP ${response.status}`);
await processStream(response.body, aiMsgEl);
} catch (err) {
console.error('发送失败:', err);
const bubble = aiMsgEl.querySelector('.bubble');
bubble.innerHTML = `<span style="color:#f85149">⚠️ 请求失败:${escapeHtml(err.message)}</span>`;
showToast('连接失败,请确认 Ollama 服务已启动');
} finally {
isStreaming = false;
sendBtn.disabled = !inputEl.value.trim();
}
}
// ── SSE 流处理(核心) ─────────────────────────────────────
//
// Spring SseEmitter 发送格式:
// event:session\ndata:uuid\n\n
// event:token\ndata:hello\n\n
// event:formatted\ndata:{...json...}\n\n
// event:done\ndata:[DONE]\n\n
//
// 解析策略:按空行(\n\n)拆分消息块,逐块提取 event 和 data。
async function processStream(body, aiMsgEl) {
const reader = body.getReader();
const decoder = new TextDecoder('utf-8');
const bubble = aiMsgEl.querySelector('.bubble');
bubble.innerHTML = '<span class="cursor"></span>';
const cursor = bubble.querySelector('.cursor');
// 流式阶段的累积内容
let rawBuffer = ''; // 未处理的字节流文本
let rawContent = ''; // 累积的所有 token 原文
while (true) {
const { done, value } = await reader.read();
if (done) break;
rawBuffer += decoder.decode(value, { stream: true });
// 按空行拆分成完整的 SSE 消息块
const blocks = rawBuffer.split('\n\n');
rawBuffer = blocks.pop(); // 最后一块可能不完整,留着下次处理
for (const block of blocks) {
if (!block.trim()) continue;
// 从块内提取 event name 和 data
let eventName = 'token';
let dataValue = '';
for (const line of block.split('\n')) {
if (line.startsWith('event:')) {
eventName = line.slice(6).trim();
} else if (line.startsWith('data:')) {
dataValue += line.slice(5); // 不 trim,保留换行
}
}
// ── 处理各类事件 ──────────────────────────────
if (eventName === 'session') {
if (!sessionId) sessionId = dataValue.trim();
continue;
}
if (eventName === 'token') {
rawContent += dataValue;
renderStreaming(bubble, cursor, rawContent);
scrollToBottom();
continue;
}
if (eventName === 'formatted') {
// 收到解析器格式化结果,渲染最终消息
try {
const parsed = JSON.parse(dataValue.trim());
renderFormatted(bubble, cursor, parsed);
scrollToBottom();
} catch (e) {
console.error('formatted 事件 JSON 解析失败:', e);
}
continue;
}
if (eventName === 'done') {
// 保险:若 formatted 未触发,降级渲染原始内容
if (cursor.parentNode) {
cursor.remove();
if (!bubble.querySelector('.md-body')) {
renderFallback(bubble, rawContent);
}
}
scrollToBottom();
return;
}
if (eventName === 'error') {
cursor.remove();
bubble.innerHTML = `<span style="color:#f85149">⚠️ ${escapeHtml(dataValue.trim())}</span>`;
scrollToBottom();
return;
}
}
}
// 读取结束兜底
if (cursor.parentNode) {
cursor.remove();
if (!bubble.querySelector('.md-body')) {
renderFallback(bubble, rawContent);
}
}
scrollToBottom();
}
// ── 流式阶段渲染(原始文本,保留 <think> 标签展示思考)───
function renderStreaming(bubble, cursor, content) {
const parsed = splitThink(content);
let html = '';
if (parsed.thinking) {
const lines = parsed.thinking.trim().split('\n').length;
html += `
<div class="think-block">
<div class="think-header open" onclick="toggleThink(this)">
<span class="think-icon">💭</span>
<span class="think-label">正在思考...</span>
<span class="think-count">${lines} 行</span>
<span class="think-chevron">▶</span>
</div>
<div class="think-content visible">${escapeHtml(parsed.thinking.trim())}</div>
</div>`;
}
if (parsed.answer) {
html += `<div class="streaming-raw">${escapeHtml(parsed.answer)}</div>`;
} else if (parsed.inThink) {
html += `<span class="thinking-hint">(思考中,请稍候...)</span>`;
}
bubble.innerHTML = html;
bubble.appendChild(cursor);
}
// ── 最终格式化渲染(由后端 ChatOutputParser 解析完成)────
function renderFormatted(bubble, cursor, parsed) {
cursor.remove();
let html = '';
if (parsed.hasThinking && parsed.thinkingContent) {
const lines = parsed.thinkingContent.trim().split('\n').length;
const wordCount = parsed.thinkingContent.trim().split(/\s+/).length;
html += `
<div class="think-block">
<div class="think-header" onclick="toggleThink(this)">
<span class="think-icon">💭</span>
<span class="think-label">思考过程</span>
<span class="think-count">${wordCount} 词 · ${lines} 行</span>
<span class="think-chevron">▶</span>
</div>
<div class="think-content">${escapeHtml(parsed.thinkingContent.trim())}</div>
</div>`;
}
if (parsed.responseContent && parsed.responseContent.trim()) {
const renderedMd = marked.parse(parsed.responseContent.trim());
html += `<div class="md-body">${renderedMd}</div>`;
} else if (!parsed.hasThinking) {
html += `<em style="color:var(--text-muted)">(无回复内容)</em>`;
}
// 记忆状态徽章:显示当前上下文消息数
if (parsed.memorySize != null) {
html += `<div class="memory-badge">
<span class="mem-dot"></span>
记忆 ${parsed.memorySize} 条消息
</div>`;
}
bubble.innerHTML = html;
// 代码高亮 + 复制按钮
bubble.querySelectorAll('pre code').forEach(block => {
hljs.highlightElement(block);
addCopyButton(block.closest('pre'));
});
}
// ── 降级渲染(异常情况,直接解析原始 Markdown)────────────
function renderFallback(bubble, rawContent) {
const parsed = splitThink(rawContent);
let html = '';
if (parsed.thinking) {
html += `
<div class="think-block">
<div class="think-header" onclick="toggleThink(this)">
<span class="think-icon">💭</span>
<span class="think-label">思考过程</span>
<span class="think-chevron">▶</span>
</div>
<div class="think-content">${escapeHtml(parsed.thinking.trim())}</div>
</div>`;
}
html += `<div class="md-body">${marked.parse(parsed.answer || rawContent)}</div>`;
bubble.innerHTML = html;
bubble.querySelectorAll('pre code').forEach(block => {
hljs.highlightElement(block);
addCopyButton(block.closest('pre'));
});
}
// ── <think> 内容拆分 ──────────────────────────────────────
function splitThink(content) {
const start = content.indexOf('<think>');
const end = content.indexOf('</think>');
if (start === -1) return { thinking: '', answer: content, inThink: false };
const thinking = end === -1
? content.slice(start + 7)
: content.slice(start + 7, end);
const answer = end === -1 ? '' : content.slice(end + 8).trim();
return { thinking, answer, inThink: end === -1 };
}
// ── think 块折叠 ──────────────────────────────────────────
function toggleThink(header) {
header.classList.toggle('open');
header.nextElementSibling.classList.toggle('visible');
}
// ── 代码复制按钮 ──────────────────────────────────────────
function addCopyButton(pre) {
if (!pre || pre.querySelector('.copy-btn')) return;
const wrapper = document.createElement('div');
wrapper.className = 'code-block-wrapper';
pre.parentNode.insertBefore(wrapper, pre);
wrapper.appendChild(pre);
const btn = document.createElement('button');
btn.className = 'copy-btn';
btn.textContent = '复制';
btn.addEventListener('click', async () => {
const code = pre.querySelector('code').innerText;
await navigator.clipboard.writeText(code).catch(() => {});
btn.textContent = '已复制 ✓';
btn.classList.add('copied');
setTimeout(() => { btn.textContent = '复制'; btn.classList.remove('copied'); }, 2000);
});
wrapper.appendChild(btn);
}
// ── DOM 操作 ──────────────────────────────────────────────
function appendUserMessage(text) {
const el = document.createElement('div');
el.className = 'message user';
el.innerHTML = `
<div class="avatar">👤</div>
<div class="bubble"><div class="md-body">${escapeHtml(text)}</div></div>`;
messagesEl.appendChild(el);
scrollToBottom();
}
function appendAiTyping() {
const modelTag = selectedModel
? `<span style="font-size:10px;color:var(--text-muted);margin-bottom:6px;display:block">${selectedModel}</span>`
: '';
const el = document.createElement('div');
el.className = 'message ai';
el.innerHTML = `
<div class="avatar">🤖</div>
<div class="bubble">
${modelTag}
<div class="typing-dots"><span></span><span></span><span></span></div>
</div>`;
messagesEl.appendChild(el);
scrollToBottom();
return el;
}
// ── 新对话 ─────────────────────────────────────────────────
function newChat() {
if (sessionId) {
fetch(`/api/chat/session/${sessionId}`, { method: 'DELETE' }).catch(() => {});
}
sessionId = null;
messagesEl.innerHTML = `
<div class="welcome" id="welcome-screen">
<div class="welcome-icon">✨</div>
<h1>你好,我是 AI 助手</h1>
<p>由本地模型驱动,完全在你的本地设备上运行,无需联网。</p>
<div class="welcome-hints">
<div class="hint-card" data-text="用 Java 实现一个快速排序算法">💻 用 Java 实现一个快速排序算法</div>
<div class="hint-card" data-text="解释一下什么是 Spring Boot,以及它的核心特性">📚 解释 Spring Boot 的核心特性</div>
<div class="hint-card" data-text="写一个 SQL 查询,找出订单金额最高的前 10 名用户">🗄️ 写一个查询最高订单的 SQL</div>
<div class="hint-card" data-text="帮我检查下面这段代码有什么问题,并给出优化建议">🔍 帮我审查并优化一段代码</div>
</div>
</div>`;
document.querySelectorAll('.hint-card').forEach(card => {
card.addEventListener('click', () => {
inputEl.value = card.dataset.text;
inputEl.dispatchEvent(new Event('input'));
inputEl.focus();
});
});
inputEl.value = '';
inputEl.style.height = 'auto';
sendBtn.disabled = true;
showToast('已开启新对话');
}
// ── 工具 ──────────────────────────────────────────────────
function scrollToBottom() {
chatContainer.scrollTop = chatContainer.scrollHeight;
}
function escapeHtml(str) {
return String(str)
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
function showToast(msg) {
toastEl.textContent = msg;
toastEl.classList.add('show');
setTimeout(() => toastEl.classList.remove('show'), 2500);
}
</script>
</body>
</html>
请求入口:
java
@Slf4j
@RestController
@RequestMapping("/api")
@CrossOrigin(origins = "*")
@RequiredArgsConstructor
public class ChatController {
private final ChatService chatService;
private final OllamaConfig ollamaConfig;
private final ObjectMapper objectMapper;
private final PromptTemplateService promptTemplateService;
private final HttpClient httpClient = HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(5))
.build();
// ── 流式对话 ──────────────────────────────────────────────
/**
* 流式对话接口,返回 {@code Flux<ServerSentEvent<String>>}。
*
* <p>先发送携带 sessionId 的 {@code session} 事件,
* 再通过 {@link Flux#concat} 拼接模型推理流,整体作为一个 SSE 响应返回。
* 相比原 {@code SseEmitter} 方案,无需手动管理超时和线程池。
*/
@PostMapping(value = "/chat/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<ServerSentEvent<String>> streamChat(@RequestBody ChatRequest request) {
if (!StringUtils.hasText(request.getSessionId())) {
request.setSessionId(UUID.randomUUID().toString());
}
log.info("流式对话 - 会话: {}, 模型: {}, 消息长度: {}",
request.getSessionId(),
StringUtils.hasText(request.getModelName()) ? request.getModelName() : "默认",
request.getMessage().length());
// session 事件作为流的第一个元素,前端据此保存会话 ID
Flux<ServerSentEvent<String>> sessionEvent = Flux.just(
ServerSentEvent.<String>builder()
.event("session")
.data(request.getSessionId())
.build()
);
Flux<ServerSentEvent<String>> chatStream = chatService.streamChatFlux(
request.getSessionId(),
request.getMessage(),
request.getModelName(),
request.getTemplateId(),
Collections.emptyMap()
);
// 先推 session 事件,再推推理流
return Flux.concat(sessionEvent, chatStream);
}
/**
* 清除指定会话
*/
@DeleteMapping("/chat/session/{sessionId}")
public ResponseEntity<Map<String, String>> clearSession(@PathVariable String sessionId) {
chatService.clearSession(sessionId);
return ResponseEntity.ok(Map.of("status", "success", "message", "会话 " + sessionId + " 已清除"));
}
/**
* 查询活跃会话列表
*/
@GetMapping("/chat/sessions")
public ResponseEntity<Map<String, Object>> getSessions() {
Set<String> sessions = chatService.getActiveSessions();
Map<String, Object> result = new HashMap<>();
result.put("count", sessions.size());
result.put("sessions", sessions);
return ResponseEntity.ok(result);
}
// ── 提示词模板列表 ────────────────────────────────────────
/**
* 返回所有内置提示词模板(不含 systemPrompt 原文)
*/
@GetMapping("/templates")
public ResponseEntity<Map<String, Object>> listTemplates() {
Map<String, Object> result = new LinkedHashMap<>();
result.put("templates", promptTemplateService.listForClient());
result.put("default", PromptTemplateService.DEFAULT_TEMPLATE_ID);
return ResponseEntity.ok(result);
}
// ── 模型列表 ──────────────────────────────────────────────
/**
* 从 Ollama 获取本地已下载的模型列表(调用 GET /api/tags)
*/
@GetMapping("/models")
public ResponseEntity<Map<String, Object>> listModels() {
try {
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(ollamaConfig.getBaseUrl() + "/api/tags"))
.timeout(Duration.ofSeconds(5))
.GET()
.build();
HttpResponse<String> resp = httpClient.send(req, HttpResponse.BodyHandlers.ofString());
List<String> modelNames = new ArrayList<>();
if (resp.statusCode() == 200) {
JsonNode root = objectMapper.readTree(resp.body());
JsonNode models = root.path("models");
if (models.isArray()) {
for (JsonNode m : models) {
String name = m.path("name").asText();
if (StringUtils.hasText(name)) modelNames.add(name);
}
}
}
Map<String, Object> result = new LinkedHashMap<>();
result.put("models", modelNames);
result.put("default", ollamaConfig.getDefaultModelName());
return ResponseEntity.ok(result);
} catch (Exception e) {
log.error("获取 Ollama 模型列表失败: {}", e.getMessage());
Map<String, Object> fallback = new LinkedHashMap<>();
fallback.put("models", List.of(ollamaConfig.getDefaultModelName()));
fallback.put("default", ollamaConfig.getDefaultModelName());
fallback.put("error", "无法连接 Ollama,显示默认模型");
return ResponseEntity.ok(fallback);
}
}
}
服务类:
java
package cn.llm.service.service;
import cn.llm.service.config.OllamaConfig;
import cn.llm.service.model.ParsedChatResponse;
import cn.llm.service.parser.ChatOutputParser;
import com.fasterxml.jackson.databind.ObjectMapper;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.memory.ChatMemory;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.ollama.OllamaStreamingChatModel;
import dev.langchain4j.model.output.Response;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.codec.ServerSentEvent;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import reactor.core.publisher.Flux;
import reactor.core.scheduler.Schedulers;
import java.time.Duration;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.TextStyle;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
@Slf4j
@Service
@RequiredArgsConstructor
public class ChatService {
private static final int MAX_MESSAGES_PER_SESSION = 40;
private final OllamaConfig ollamaConfig;
private final ChatOutputParser outputParser;
private final ObjectMapper objectMapper;
private final PromptTemplateService promptTemplateService;
/** 会话记忆,key = sessionId */
private final Map<String, ChatMemory> memorySessions = new ConcurrentHashMap<>();
/**
* 模型实例缓存,相同模型名只创建一次。
*/
private final Map<String, OllamaStreamingChatModel> modelCache = new ConcurrentHashMap<>();
// ── 核心流式接口 ──────────────────────────────────────────
/**
* 以响应式方式处理流式对话,返回 {@code Flux<ServerSentEvent<String>>}。
*
* <p>使用 {@link Flux#create} 桥接 LangChain4j 的回调式 API,
* 配合 {@code subscribeOn(Schedulers.boundedElastic())} 将可能阻塞的
* Ollama HTTP 调用移入专用弹性线程池,避免占用 Servlet/事件循环线程。
*
* <p>SSE 事件序列:
* <ol>
* <li>{@code token} --- 逐 token 原始内容(含 <think> 标签)</li>
* <li>{@code formatted} --- 完整回复经解析器处理后的 JSON 结构(含 memorySize)</li>
* <li>{@code done} --- 结束标志</li>
* <li>{@code error} --- 异常信息(仅出错时)</li>
* </ol>
*/
public Flux<ServerSentEvent<String>> streamChatFlux(
String sessionId,
String userMessage,
String modelName,
String templateId,
Map<String, Object> variables) {
return Flux.<ServerSentEvent<String>>create(emitter -> {
try {
// ── 会话记忆初始化(新会话时渲染系统提示词) ────────
ChatMemory memory = memorySessions.computeIfAbsent(sessionId, id -> {
String templatePrompt = promptTemplateService.renderSystemPrompt(
templateId,
variables != null ? variables : Collections.emptyMap()
);
String fullPrompt = buildSystemPrompt(templatePrompt);
log.info("会话[{}] 使用模板[{}] 创建,提示词长度: {}", id, templateId, fullPrompt.length());
ChatMemory m = MessageWindowChatMemory.withMaxMessages(MAX_MESSAGES_PER_SESSION);
m.add(SystemMessage.from(fullPrompt));
return m;
});
memory.add(UserMessage.from(userMessage));
List<ChatMessage> messages = memory.messages();
String resolvedModel = StringUtils.hasText(modelName)
? modelName : ollamaConfig.getDefaultModelName();
log.info("会话[{}] 开始推理 - 模型: {}, 历史消息数: {}",
sessionId, resolvedModel, messages.size());
OllamaStreamingChatModel model = getOrCreateModel(resolvedModel);
// ── 桥接 LangChain4j 回调 → FluxSink ─────────────
model.generate(messages, new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
emitter.next(sse("token", token));
}
@Override
public void onComplete(Response<AiMessage> response) {
memory.add(response.content());
int memorySize = Math.max(0, memory.messages().size() - 1);
log.info("会话[{}] 推理完成,记忆消息数: {}", sessionId, memorySize);
try {
ParsedChatResponse parsed = outputParser.parse(response.content().text());
Map<String, Object> payload = new LinkedHashMap<>();
payload.put("hasThinking", parsed.isHasThinking());
payload.put("thinkingContent", parsed.getThinkingContent());
payload.put("responseContent", parsed.getResponseContent());
payload.put("memorySize", memorySize);
emitter.next(sse("formatted", objectMapper.writeValueAsString(payload)));
emitter.next(sse("done", "[DONE]"));
emitter.complete();
} catch (Exception e) {
emitter.error(e);
}
}
@Override
public void onError(Throwable error) {
log.error("会话[{}] 推理异常: {}", sessionId, error.getMessage(), error);
emitter.next(sse("error", "推理发生错误: " + error.getMessage()));
emitter.error(error);
}
});
} catch (Exception e) {
log.error("会话[{}] streamChatFlux 初始化异常: {}", sessionId, e.getMessage(), e);
emitter.error(e);
}
})
// 将 Ollama HTTP 阻塞调用移入弹性线程池(与原 @Async 线程池等效,但无需手动配置)
.subscribeOn(Schedulers.boundedElastic());
}
// ── 工具方法 ──────────────────────────────────────────────
/**
* 构造 SSE 事件的快捷方法
*/
private ServerSentEvent<String> sse(String event, String data) {
return ServerSentEvent.<String>builder()
.event(event)
.data(data)
.build();
}
/**
* 在模板提示词前统一注入当前日期和多轮对话记忆规则。
*/
private String buildSystemPrompt(String templatePrompt) {
LocalDate today = LocalDate.now();
String dateStr = today.format(DateTimeFormatter.ofPattern("yyyy年MM月dd日"));
String dayOfWeek = today.getDayOfWeek().getDisplayName(TextStyle.FULL, java.util.Locale.CHINESE);
return "【当前日期】今天是 " + dateStr + "," + dayOfWeek + "。\n" +
"【对话记忆】你拥有完整的历史对话记录。每次回答前,必须先回顾之前的消息," +
"确保回答与上下文保持连贯。如果用户询问之前说过的内容,请直接从历史记录中提取作答。\n\n" +
templatePrompt;
}
private OllamaStreamingChatModel getOrCreateModel(String modelName) {
String name = StringUtils.hasText(modelName)
? modelName : ollamaConfig.getDefaultModelName();
return modelCache.computeIfAbsent(name, m -> {
log.info("初始化模型实例: {}", m);
return OllamaStreamingChatModel.builder()
.baseUrl(ollamaConfig.getBaseUrl())
.modelName(m)
.temperature(ollamaConfig.getTemperature())
.timeout(Duration.ofSeconds(ollamaConfig.getTimeoutSeconds()))
.build();
});
}
/** 清除指定会话的上下文记忆 */
public void clearSession(String sessionId) {
memorySessions.remove(sessionId);
log.info("会话[{}] 上下文已清除", sessionId);
}
/** 获取当前所有活跃会话 ID */
public Set<String> getActiveSessions() {
return memorySessions.keySet();
}
}
java
package cn.llm.service.service;
import cn.llm.service.model.PromptTemplateDTO;
import dev.langchain4j.model.input.PromptTemplate;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.*;
/**
* 提示词模板服务。
*
* <p>内置若干场景化模板,每条模板的 systemPrompt 使用
* LangChain4j {@link PromptTemplate} 语法({@code {{变量名}}})。
* 调用 {@link #renderSystemPrompt(String, Map)} 时传入变量值即可渲染最终提示词;
* 无变量的模板直接传空 Map 即可。
*/
@Slf4j
@Service
public class PromptTemplateService {
public static final String DEFAULT_TEMPLATE_ID = "general";
/** 按注册顺序保留模板(LinkedHashMap 保证顺序) */
private final Map<String, PromptTemplateDTO> templateMap = new LinkedHashMap<>();
public PromptTemplateService() {
register(new PromptTemplateDTO(
"general", "🤖", "通用助手",
"全能型助手,适合日常问答与闲聊",
"你是一个专业、友好的 AI 助手。\n" +
"回答问题时,如需推理或分析,请将思考过程写在 <think>...</think> 标签内," +
"然后在标签之后输出最终回答。\n" +
"最终回答请使用 Markdown 格式(代码用代码块、要点用列表、关键词加粗),以提高可读性。"
));
register(new PromptTemplateDTO(
"coder", "💻", "代码专家",
"擅长编程开发、代码审查与调试",
"你是一名资深软件工程师,精通 Java、Python、JavaScript 等主流语言。\n" +
"回答时请遵循以下规范:\n" +
"1. 代码示例必须完整可运行,并附上必要的注释;\n" +
"2. 如果有多种实现方案,列出各方案的优劣;\n" +
"3. 主动指出代码中潜在的 Bug、性能瓶颈或安全隐患;\n" +
"4. 使用 Markdown 格式,代码块注明编程语言。\n" +
"如需推理,将思考过程放在 <think>...</think> 内,之后输出正式回答。"
));
register(new PromptTemplateDTO(
"translator", "🌐", "翻译助手",
"精准互译,支持中英文及多语种",
"你是一名专业翻译,精通中文、英文及多种语言的互译。\n" +
"翻译规则:\n" +
"1. 保持原文语义、语气和风格;\n" +
"2. 专业术语给出原文对照;\n" +
"3. 若原文存在歧义,翻译后附上说明;\n" +
"4. 目标语言:{{targetLanguage}}。\n" +
"请直接输出译文,无需解释过程。"
));
register(new PromptTemplateDTO(
"writer", "✍️", "写作助手",
"帮助润色文案、撰写文章与创意写作",
"你是一名资深文案编辑与写作教练。\n" +
"职责:\n" +
"1. 润色与优化:保留原意,使文字更流畅、有感染力;\n" +
"2. 结构建议:给出段落结构和逻辑优化建议;\n" +
"3. 风格适配:根据场景(正式/轻松/学术)调整语气;\n" +
"4. 创意写作:提供多个版本或角度供参考。\n" +
"输出时先展示修改后的版本,再简要说明改动理由。"
));
register(new PromptTemplateDTO(
"analyst", "📊", "数据分析",
"数据解读、SQL 查询与报表分析",
"你是一名数据分析师,擅长数据解读、SQL 编写和商业洞察。\n" +
"工作方式:\n" +
"1. 理解业务背景,再给出分析方案;\n" +
"2. SQL 示例使用标准 SQL,并附上查询逻辑说明;\n" +
"3. 数据结论需要数字支撑,避免模糊表述;\n" +
"4. 给出可视化建议(适合使用哪种图表)。\n" +
"如需推理,将思考过程放在 <think>...</think> 内,之后输出正式回答。"
));
register(new PromptTemplateDTO(
"tutor", "📚", "学习导师",
"深入浅出地解释概念,引导式教学",
"你是一名耐心的学习导师,擅长将复杂概念用简单语言解释清楚。\n" +
"教学原则:\n" +
"1. 先了解学习者的基础,再调整讲解深度;\n" +
"2. 多用类比和生活化例子;\n" +
"3. 循序渐进,每次只引入必要的新概念;\n" +
"4. 讲解后提出 1~2 个思考问题,引导深入学习;\n" +
"5. 鼓励提问,营造无压力的学习氛围。\n" +
"如需推理,将思考过程放在 <think>...</think> 内,之后输出正式回答。"
));
}
private void register(PromptTemplateDTO template) {
templateMap.put(template.getId(), template);
}
/** 返回所有模板列表(仅返回 id/icon/name/description,不含 systemPrompt) */
public List<Map<String, String>> listForClient() {
List<Map<String, String>> result = new ArrayList<>();
for (PromptTemplateDTO t : templateMap.values()) {
Map<String, String> item = new LinkedHashMap<>();
item.put("id", t.getId());
item.put("icon", t.getIcon());
item.put("name", t.getName());
item.put("description", t.getDescription());
result.add(item);
}
return result;
}
/**
* 渲染指定模板的系统提示词。
*
* <p>使用 LangChain4j {@link PromptTemplate} 对 {@code {{变量名}}} 占位符进行替换。
* 模板无变量时传入空 Map 即可。
*
* @param templateId 模板 ID;为空或找不到时使用 {@link #DEFAULT_TEMPLATE_ID}
* @param variables 变量 Map,key 为占位符名称,value 为替换值
* @return 渲染后的系统提示词文本
*/
public String renderSystemPrompt(String templateId, Map<String, Object> variables) {
String id = (templateId != null && templateMap.containsKey(templateId))
? templateId : DEFAULT_TEMPLATE_ID;
PromptTemplateDTO dto = templateMap.get(id);
try {
PromptTemplate pt = PromptTemplate.from(dto.getSystemPrompt());
return pt.apply(variables).text();
} catch (Exception e) {
// 变量不匹配时(如模板含 {{targetLanguage}} 但未传入),降级使用原始文本
log.warn("模板[{}]渲染失败(变量缺失?),降级使用原始提示词: {}", id, e.getMessage());
return dto.getSystemPrompt();
}
}
/** 快捷方法:无变量渲染 */
public String renderSystemPrompt(String templateId) {
return renderSystemPrompt(templateId, Collections.emptyMap());
}
}
实体类:
java
package cn.llm.service.model;
import lombok.Data;
@Data
public class ChatRequest {
/**
* 会话 ID,用于维持多轮对话上下文。
* 前端首次请求时传 null,服务端会返回生成的 ID。
*/
private String sessionId;
/**
* 用户输入的消息内容
*/
private String message;
/**
* 指定使用的模型名称(如 deepseek-r1:8b、gemma3:1b)。
* 为空时使用配置文件中的默认模型。
*/
private String modelName;
/**
* 提示词模板 ID(如 general、coder、translator...)。
* 仅在创建新会话时生效;已有会话的模板不会被覆盖。
* 为空时使用 general(通用助手)模板。
*/
private String templateId;
}
java
package cn.llm.service.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* 模型输出解析结果:将原始回复拆解为「思考过程」与「正式回答」两部分。
* DeepSeek R1 系列模型会在 <think>...</think> 标签内输出推理链,
* 本 DTO 把两部分分离,方便前端分区展示。
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class ParsedChatResponse {
/** 是否包含思考过程 */
private boolean hasThinking;
/** 模型的推理/思考内容(来自 <think> 标签,已去除标签本身) */
private String thinkingContent;
/** 最终回答内容(<think> 标签之后的部分,原始 Markdown) */
private String responseContent;
}
java
package cn.llm.service.model;
import lombok.AllArgsConstructor;
import lombok.Data;
/**
* 提示词模板描述对象。
* systemPrompt 支持 LangChain4j PromptTemplate 的 {{变量名}} 占位符语法,
* 在会话初始化时由 PromptTemplateService 渲染为最终文本。
*/
@Data
@AllArgsConstructor
public class PromptTemplateDTO {
/** 唯一标识,用于接口传参 */
private String id;
/** 显示图标(emoji) */
private String icon;
/** 模板名称 */
private String name;
/** 简短描述,展示在选择器卡片中 */
private String description;
/** 系统提示词原文(可含 {{变量}} 占位符) */
private String systemPrompt;
}
java
package cn.llm.service.parser;
import cn.llm.service.model.ParsedChatResponse;
import org.springframework.stereotype.Component;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 模型输出解析器,负责将模型原始输出解析为结构化的 {@link ParsedChatResponse}:
* <ol>
* <li>提取所有 {@code <think>...</think>} 块作为推理内容</li>
* <li>移除 think 块后,剩余文本作为正式回答</li>
* <li>对正式回答做基本的格式化清理(去除首尾多余空白行)</li>
* </ol>
*/
@Component
public class ChatOutputParser {
/**
* 匹配 {@code <think>...</think>} 内容块(支持跨行、多个)
*/
private static final Pattern THINK_PATTERN =
Pattern.compile("<think>(.*?)</think>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/**
* 解析模型原始输出,拆分思考过程与正式回答。
*
* @param rawText 模型输出的原始文本
* @return 结构化的 {@link ParsedChatResponse}
*/
public ParsedChatResponse parse(String rawText) {
if (rawText == null || rawText.isEmpty()) {
return new ParsedChatResponse(false, "", "");
}
Matcher matcher = THINK_PATTERN.matcher(rawText);
StringBuilder thinkingBuilder = new StringBuilder();
// 提取所有 <think> 块内容
while (matcher.find()) {
String block = matcher.group(1).trim();
if (!block.isEmpty()) {
if (thinkingBuilder.length() > 0) {
thinkingBuilder.append("\n\n");
}
thinkingBuilder.append(block);
}
}
// 去掉所有 <think>...</think> 块,得到正式回答
String responseContent = THINK_PATTERN.matcher(rawText).replaceAll("").trim();
String thinkingContent = thinkingBuilder.toString();
boolean hasThinking = !thinkingContent.isEmpty();
return new ParsedChatResponse(hasThinking, thinkingContent, responseContent);
}
/**
* 描述本解析器的格式约定,可嵌入 System Prompt 以引导模型输出。
*/
public String formatInstructions() {
return "如需进行推理或思考,请将过程包含在 <think>...</think> 标签内,"
+ "标签之后输出最终回答。最终回答应使用 Markdown 格式。";
}
}
yaml配置:
XML
server:
port: 8080
spring:
application:
name: llm-service
# 关闭 MVC 异步超时限制,配合 SseEmitter 长连接
mvc:
async:
request-timeout: 330000
ollama:
# Ollama 服务地址(默认本地)
base-url: http://localhost:11434
# 使用的模型名称
model-name: gemma3:1b
# 生成温度(0.0 ~ 1.0,越高越有创意)
temperature: 0.7
# 推理超时时间(秒)
timeout-seconds: 600
logging:
level:
cn.llm.service: INFO
dev.langchain4j: WARN
pom依赖:
XML
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.5</version>
<relativePath/>
</parent>
<groupId>cn.llm</groupId>
<artifactId>llm-service</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>llm-service</name>
<description>基于 LangChain4j + Ollama 的本地 AI 聊天机器人</description>
<properties>
<java.version>17</java.version>
<langchain4j.version>0.36.2</langchain4j.version>
</properties>
<dependencies>
<!-- Spring Boot Web(Servlet 容器,与 WebFlux 共存时 MVC 优先) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- WebFlux:引入 Reactor(Flux/Mono/Sinks)及 ServerSentEvent 等响应式类型 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<!-- LangChain4j Core -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<!-- LangChain4j Ollama 集成 -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-ollama</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- 测试 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>