Spring AI + Ollama简单使用

一、环境准备

1. 前置条件

  1. 本地安装 Ollama,启动服务(默认地址:http://localhost:11434

  2. 拉取任意模型(示例用 qwen3.5:4b,轻量快速)

    复制代码
    docker exec -it ollama ollama run qwen3.5:4b
  3. SpringBoot 4.x 项目

2. Maven 依赖 pom.xml

复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>4.1.0</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.ybw</groupId>
    <artifactId>spring-ai-demo</artifactId>
    <version>1.0.0</version>
    <name>spring-ai-demo</name>
    <description>spring-ai-demo</description>
    <modules>
        <module>quick-start</module>
    </modules>
    <packaging>pom</packaging>

    <properties>
        <java.version>21</java.version>
        <spring-ai.version>2.0.0</spring-ai.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-webmvc</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.ai</groupId>
            <artifactId>spring-ai-starter-model-ollama</artifactId>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-webmvc-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.ai</groupId>
                <artifactId>spring-ai-bom</artifactId>
                <version>${spring-ai.version}</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </exclude>
                    </excludes>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <executions>
                    <execution>
                        <id>default-compile</id>
                        <phase>compile</phase>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                        <configuration>
                            <annotationProcessorPaths>
                                <path>
                                    <groupId>org.projectlombok</groupId>
                                    <artifactId>lombok</artifactId>
                                </path>
                            </annotationProcessorPaths>
                        </configuration>
                    </execution>
                    <execution>
                        <id>default-testCompile</id>
                        <phase>test-compile</phase>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                        <configuration>
                            <annotationProcessorPaths>
                                <path>
                                    <groupId>org.projectlombok</groupId>
                                    <artifactId>lombok</artifactId>
                                </path>
                            </annotationProcessorPaths>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>

二、配置文件 application.yml

复制代码
spring:
  application:
    name: spring-ai-demo
  ai:
    ollama:
      base-url: http://127.0.0.1:11434
      chat:
        model: qwen3.5:4b # 模型
        temperature: 0.7 # 温度,数值越高,输出结果越随机,数值越低,输出结果越一致。范围0-1
        top-p: 0.7 # 概率
        think: true # 是否思考

温度

temperature 取值范围:0 ~ 1,核心逻辑:数值越大,模型随机性、创造性越强;数值越小,输出越确定、严谨、重复度低。

温度区间 核心风格 推荐业务场景
0 严谨、统一、无幻觉 代码、SQL、结构化输出、数学、标准问答
0.1~0.3 专业、精准、轻微灵活 技术 / 法律 / 医疗专业问答、文档解析
0.4~0.7 均衡自然(默认) 日常聊天、科普、工作总结、普通文案
0.8~0.9 高创意、发散 写诗、故事、策划、起名、脑洞创作
1.0 极致随机 纯艺术自由创作,极少业务使用

三、配置类 + 测试 Demo

配置 ChatClient

复制代码
package com.ybw.config;

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
 * 配置 ChatClient
 *
 * @author ybw
 * @version V1.0
 * @className ChatClientConfig
 * @date 2026/6/29
 **/
@Configuration
public class ChatClientConfig {

    /**
     * 注入 ChatClient,SpringAI 自动装配 OllamaChatModel
     *
     * @param builder 构建 ChatClient
     * @methodName: chatClient
     * @return: org.springframework.ai.chat.client.ChatClient
     * @author: ybw
     * @date: 2026/6/29
     **/
    @Bean
    ChatClient chatClient(ChatClient.Builder builder) {
        return builder.build();
    }
}

单元测试

无思考过程

复制代码
package com.ybw;

import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.boot.test.context.SpringBootTest;

/**
 * 测试ollama
 *
 * @author ybw
 * @version V1.0
 * @className TestOllama
 * @date 2026/6/29
 **/
@SpringBootTest
@Slf4j
public class TestOllama {
    @Resource
    private ChatClient chatClient;

    /**
     * 测试ollama:简单聊天demo,打印出聊天结果
     */
    @Test
    public void testOllama() {
        // 1. 单次简单提问
        String prompt = "请写一首描述清晨的诗";
        log.info("===== 用户提问:{} =====", prompt);
        // 同步调用 ollama,获取完整返回文本
        String response = chatClient.prompt()
                .user(prompt)
                .call()
                .content();
        // 打印大模型回答
        log.info("===== Ollama 返回结果 =====");
       log.info(response);
       log.info("===========================");

        // 2. 多轮对话示例(携带上下文)
       log.info("===== 多轮对话测试 =====");
        String multiRoundRes = chatClient.prompt()
                .user("什么是Java?")
                .system("你是简洁的编程讲师,回答不超过两句话")
                .call()
                .content();
       log.info(multiRoundRes);
    }

    /**
     * 测试ollama:流式输出聊天结果
     */
    @Test
    public void testOllamaStream() {
        // 流式响应,逐段打印
        chatClient.prompt()
                .user("写一段100字的春日短文")
                .stream()
                .content()
                .doOnNext(System.out::print)
                .blockLast();
    }
}
  • 流式输出(逐行实时打印,类似打字机效果)

有思考过程

复制代码
package com.ybw;

import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.messages.AssistantMessage;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.boot.test.context.SpringBootTest;
import reactor.core.publisher.Flux;

import java.util.Map;

/**
 * 测试ollama思考过程
 *
 * @author ybw
 * @version V1.0
 * @className TestThinkOllama
 * @date 2026/6/29
 **/
@SpringBootTest
@Slf4j
public class TestThinkOllama {

    @Resource
    private ChatClient chatClient;

    /**
     * 测试ollama:思考过程
     */
    @Test
    public void testThinkOllama() {
        //1. 创建一个 Prompt
        String prompt = "java是什么?";
        log.info("===== 用户提问:{} =====", prompt);
        //2. 调用 ollama
        ChatResponse chatResponse = chatClient.prompt()
                .user(prompt)
                .call()
                .chatResponse();

        // 3. 关键修改:获取完整输出内容
        AssistantMessage assistantMessage = chatResponse.getResult().getOutput();

        // 3.1 获取思考过程(从 metadata 中提取)
        Map<String, Object> metadata = assistantMessage.getMetadata();
        String thinking = metadata.get("thinking").toString();
        log.info("===== 思考过程 =====");
        log.info(thinking);

        // 3.2 获取最终答案
        String finalAnswer = assistantMessage.getText();
        log.info("===== 最终答案 =====");
        log.info(finalAnswer);
    }

    /**
     * 测试ollama:流式输出聊天结果
     */
    @Test
    public void testThinkOllamaStream() {
        //1. 最终结果存储
        StringBuilder finishThinkingStr = new StringBuilder();
        StringBuilder finishAnswerStr = new StringBuilder();

        //1. 流式响应,逐段打印
        Flux<ChatResponse> chatResponseFlux = chatClient.prompt()
                .user("你好")
                .stream()
                .chatResponse();
        // 2. 逐段打印
        chatResponseFlux.doOnNext(chatResponse -> {
                    // 2.1 获取完整输出内容
                    AssistantMessage assistantMessage = chatResponse.getResult().getOutput();
                    // 2.2 获取思考过程(从 metadata 中提取)
                    Map<String, Object> metadata = assistantMessage.getMetadata();
                    Object thinking = metadata.get("thinking");
                    if (thinking != null) {
                        String thinkingStr = thinking.toString();
                        // 灰色显示思考过程
                        System.out.print("\u001B[90m" + thinkingStr + "\u001B[0m");
                        finishThinkingStr.append(thinkingStr);
                    } else {
                        //只有刚开始打印的时候,换一次行
                        if (finishAnswerStr.isEmpty()) {
                            System.out.println();
                        }
                        // 2.3 获取最终答案
                        String finalAnswer = assistantMessage.getText();
                        System.out.print(finalAnswer);
                        finishAnswerStr.append(finalAnswer);
                    }
                }).doOnComplete(() -> {
                    System.out.println();
                    log.info("思考:{}", finishThinkingStr);
                    log.info("回复:{}", finishAnswerStr);
                })
                // 等待流结束
                .blockLast();
    }

}