一、环境准备
1. 前置条件
-
本地安装 Ollama,启动服务(默认地址:
http://localhost:11434) -
拉取任意模型(示例用 qwen3.5:4b,轻量快速)
docker exec -it ollama ollama run qwen3.5:4b -
SpringBoot 4.x 项目
2. Maven 依赖 pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>4.1.0</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.ybw</groupId>
<artifactId>spring-ai-demo</artifactId>
<version>1.0.0</version>
<name>spring-ai-demo</name>
<description>spring-ai-demo</description>
<modules>
<module>quick-start</module>
</modules>
<packaging>pom</packaging>
<properties>
<java.version>21</java.version>
<spring-ai.version>2.0.0</spring-ai.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webmvc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-ollama</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webmvc-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>${spring-ai.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<id>default-compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</path>
</annotationProcessorPaths>
</configuration>
</execution>
<execution>
<id>default-testCompile</id>
<phase>test-compile</phase>
<goals>
<goal>testCompile</goal>
</goals>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</path>
</annotationProcessorPaths>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
二、配置文件 application.yml
spring:
application:
name: spring-ai-demo
ai:
ollama:
base-url: http://127.0.0.1:11434
chat:
model: qwen3.5:4b # 模型
temperature: 0.7 # 温度,数值越高,输出结果越随机,数值越低,输出结果越一致。范围0-1
top-p: 0.7 # 概率
think: true # 是否思考
温度
temperature 取值范围:0 ~ 1,核心逻辑:数值越大,模型随机性、创造性越强;数值越小,输出越确定、严谨、重复度低。
| 温度区间 | 核心风格 | 推荐业务场景 |
|---|---|---|
| 0 | 严谨、统一、无幻觉 | 代码、SQL、结构化输出、数学、标准问答 |
| 0.1~0.3 | 专业、精准、轻微灵活 | 技术 / 法律 / 医疗专业问答、文档解析 |
| 0.4~0.7 | 均衡自然(默认) | 日常聊天、科普、工作总结、普通文案 |
| 0.8~0.9 | 高创意、发散 | 写诗、故事、策划、起名、脑洞创作 |
| 1.0 | 极致随机 | 纯艺术自由创作,极少业务使用 |
三、配置类 + 测试 Demo
配置 ChatClient
package com.ybw.config;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* 配置 ChatClient
*
* @author ybw
* @version V1.0
* @className ChatClientConfig
* @date 2026/6/29
**/
@Configuration
public class ChatClientConfig {
/**
* 注入 ChatClient,SpringAI 自动装配 OllamaChatModel
*
* @param builder 构建 ChatClient
* @methodName: chatClient
* @return: org.springframework.ai.chat.client.ChatClient
* @author: ybw
* @date: 2026/6/29
**/
@Bean
ChatClient chatClient(ChatClient.Builder builder) {
return builder.build();
}
}
单元测试
无思考过程
package com.ybw;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.boot.test.context.SpringBootTest;
/**
* 测试ollama
*
* @author ybw
* @version V1.0
* @className TestOllama
* @date 2026/6/29
**/
@SpringBootTest
@Slf4j
public class TestOllama {
@Resource
private ChatClient chatClient;
/**
* 测试ollama:简单聊天demo,打印出聊天结果
*/
@Test
public void testOllama() {
// 1. 单次简单提问
String prompt = "请写一首描述清晨的诗";
log.info("===== 用户提问:{} =====", prompt);
// 同步调用 ollama,获取完整返回文本
String response = chatClient.prompt()
.user(prompt)
.call()
.content();
// 打印大模型回答
log.info("===== Ollama 返回结果 =====");
log.info(response);
log.info("===========================");
// 2. 多轮对话示例(携带上下文)
log.info("===== 多轮对话测试 =====");
String multiRoundRes = chatClient.prompt()
.user("什么是Java?")
.system("你是简洁的编程讲师,回答不超过两句话")
.call()
.content();
log.info(multiRoundRes);
}
/**
* 测试ollama:流式输出聊天结果
*/
@Test
public void testOllamaStream() {
// 流式响应,逐段打印
chatClient.prompt()
.user("写一段100字的春日短文")
.stream()
.content()
.doOnNext(System.out::print)
.blockLast();
}
}
- 流式输出(逐行实时打印,类似打字机效果)
有思考过程
package com.ybw;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.messages.AssistantMessage;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.boot.test.context.SpringBootTest;
import reactor.core.publisher.Flux;
import java.util.Map;
/**
* 测试ollama思考过程
*
* @author ybw
* @version V1.0
* @className TestThinkOllama
* @date 2026/6/29
**/
@SpringBootTest
@Slf4j
public class TestThinkOllama {
@Resource
private ChatClient chatClient;
/**
* 测试ollama:思考过程
*/
@Test
public void testThinkOllama() {
//1. 创建一个 Prompt
String prompt = "java是什么?";
log.info("===== 用户提问:{} =====", prompt);
//2. 调用 ollama
ChatResponse chatResponse = chatClient.prompt()
.user(prompt)
.call()
.chatResponse();
// 3. 关键修改:获取完整输出内容
AssistantMessage assistantMessage = chatResponse.getResult().getOutput();
// 3.1 获取思考过程(从 metadata 中提取)
Map<String, Object> metadata = assistantMessage.getMetadata();
String thinking = metadata.get("thinking").toString();
log.info("===== 思考过程 =====");
log.info(thinking);
// 3.2 获取最终答案
String finalAnswer = assistantMessage.getText();
log.info("===== 最终答案 =====");
log.info(finalAnswer);
}
/**
* 测试ollama:流式输出聊天结果
*/
@Test
public void testThinkOllamaStream() {
//1. 最终结果存储
StringBuilder finishThinkingStr = new StringBuilder();
StringBuilder finishAnswerStr = new StringBuilder();
//1. 流式响应,逐段打印
Flux<ChatResponse> chatResponseFlux = chatClient.prompt()
.user("你好")
.stream()
.chatResponse();
// 2. 逐段打印
chatResponseFlux.doOnNext(chatResponse -> {
// 2.1 获取完整输出内容
AssistantMessage assistantMessage = chatResponse.getResult().getOutput();
// 2.2 获取思考过程(从 metadata 中提取)
Map<String, Object> metadata = assistantMessage.getMetadata();
Object thinking = metadata.get("thinking");
if (thinking != null) {
String thinkingStr = thinking.toString();
// 灰色显示思考过程
System.out.print("\u001B[90m" + thinkingStr + "\u001B[0m");
finishThinkingStr.append(thinkingStr);
} else {
//只有刚开始打印的时候,换一次行
if (finishAnswerStr.isEmpty()) {
System.out.println();
}
// 2.3 获取最终答案
String finalAnswer = assistantMessage.getText();
System.out.print(finalAnswer);
finishAnswerStr.append(finalAnswer);
}
}).doOnComplete(() -> {
System.out.println();
log.info("思考:{}", finishThinkingStr);
log.info("回复:{}", finishAnswerStr);
})
// 等待流结束
.blockLast();
}
}