SpringBoot 构建大模型后端文本对话服务

SpringBoot 构建大模型后端文本对话服务

这里以阿里大模型Qwen为例

首先来到Qwen API官网介绍链接https://bailian.console.aliyun.com/?tab=model#/model-market/detail/qwen3?modelGroup=qwen3

一、创建并保存API KEY

先来到这个API KEY的管理页面https://bailian.console.aliyun.com/?tab=model#/api-key,创建一个API-KEY。然后复制,保存下来。一般格式都是`sk-xxxxxxxxxxxxxxxxxxxxx`这样。

二、查看官网大模型调用SDK示例方法

https://bailian.console.aliyun.com/?tab=api#/api/?type=model\&url=https%3A%2F%2Fhelp.aliyun.com%2Fdocument_detail%2F2712193.html\&renderType=iframe 到官网查看,安装对应的SDK。

XML 复制代码
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>dashscope-sdk-java</artifactId>
    <!-- 请将 'the-latest-version' 替换为最新版本号:https://mvnrepository.com/artifact/com.alibaba/dashscope-sdk-java -->
    <version>the-latest-version</version>
</dependency>

这里我选择的是2.20.8版本,导入的依赖是

xml 复制代码
<!-- https://mvnrepository.com/artifact/com.alibaba/dashscope-sdk-java -->
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>dashscope-sdk-java</artifactId>
    <version>2.20.8</version>
</dependency>

然后查看对应API的调用代码示例,这里以文本生成-流式输出为例:

https://help.aliyun.com/zh/model-studio/stream?spm=a2c4g.11186623.help-menu-2400256.d_0_1_2.5dae4823ODN1oP#b38f4f4698iyb

Java 复制代码
import java.util.Arrays;
import java.lang.System;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;

public class Main {
    private static final Logger logger = LoggerFactory.getLogger(Main.class);
    private static StringBuilder fullContent = new StringBuilder();
    private static void handleGenerationResult(GenerationResult message) {
        String content = message.getOutput().getChoices().get(0).getMessage().getContent();
        fullContent.append(content);
        System.out.println(content);
    }
    public static void streamCallWithMessage(Generation gen, Message userMsg)
            throws NoApiKeyException, ApiException, InputRequiredException {
        GenerationParam param = buildGenerationParam(userMsg);
        System.out.println("流式输出内容为:");
        Flowable<GenerationResult> result = gen.streamCall(param);
        result.blockingForEach(message -> handleGenerationResult(message));
        System.out.println("完整内容为: " + fullContent.toString());
    }
    private static GenerationParam buildGenerationParam(Message userMsg) {
        return GenerationParam.builder()
                // 若没有配置环境变量,请用阿里云百炼API Key将下行替换为:.apiKey("sk-xxx")
                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
                .model("qwen-plus")   // 此处以qwen-plus为例,您可按需更换模型名称。模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
                .messages(Arrays.asList(userMsg))
                .resultFormat(GenerationParam.ResultFormat.MESSAGE)
                // Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
                // 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
                //.enableThinking(false)
                .incrementalOutput(true)
                .build();
    }
    public static void main(String[] args) {
        try {
            Generation gen = new Generation();
            Message userMsg = Message.builder().role(Role.USER.getValue()).content("你是谁?").build();
            streamCallWithMessage(gen, userMsg);
        } catch (ApiException | NoApiKeyException | InputRequiredException  e) {
            logger.error("An exception occurred: {}", e.getMessage());
        }
        System.exit(0);
    }
}

三、写Controller和对应的Service

对应的Service类

复制代码
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.baidu.streamapi.config.SpringContextHolder;
import com.baidu.streamapi.utils.ProxySetting;
import org.springframework.http.*;
import org.springframework.http.client.SimpleClientHttpRequestFactory;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RequestCallback;
import org.springframework.web.client.ResponseExtractor;
import org.springframework.web.client.RestTemplate;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.function.Consumer;
import com.alibaba.fastjson2.JSONArray;
import org.springframework.web.util.DefaultUriBuilderFactory;

@Service
public class QwenService {

    // 替换为你的通义千问 API Key
    private static final String QWEN_API_KEY = "sk-xxxxxxxxxx";
    private static final String QWEN_API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation";

    private final RestTemplate restTemplate = new RestTemplate();

    public void streamChatCompletions(String prompt, Consumer<String> onChunk, Runnable onComplete, Consumer<Exception> onError) {
        try {
            System.out.println("准备请求通义千问,prompt=" + prompt);
            JSONObject body = new JSONObject();
            body.put("model", "qwen-turbo"); // 可选模型名
            body.put("input", new JSONObject().fluentPut("prompt", prompt));
            body.put("parameters", new JSONObject().fluentPut("result_format", "message"));
            body.put("stream", true);

            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.APPLICATION_JSON);
            headers.set("Authorization", "Bearer " + QWEN_API_KEY);
            headers.setAccept(Collections.singletonList(MediaType.valueOf("text/event-stream")));

            HttpEntity<String> requestEntity = new HttpEntity<>(body.toJSONString(), headers);

            RequestCallback requestCallback = restTemplate.httpEntityCallback(requestEntity, String.class);

            ResponseExtractor<Void> responseExtractor = response -> {
                System.out.println("已收到通义千问响应,开始读取流...");
                try (BufferedReader reader = new BufferedReader(new InputStreamReader(response.getBody(), StandardCharsets.UTF_8))) {
                    String line;
                    while ((line = reader.readLine()) != null) {
                        System.out.println("收到一行: " + line);
                        if (line.startsWith("data:")) {
                            // 去掉 "data:" 前缀
                            String jsonStr = line.substring(line.indexOf(":") + 1);
                            System.out.println("===>data is: " + jsonStr);
                            onChunk.accept(jsonStr);
                        }
                    }
                    System.out.println("流读取完毕,调用onComplete");
//                    onChunk.accept("[DONE]");
                    onComplete.run();
                } catch (Exception e) {
                    System.out.println("流处理异常: " + e.getMessage());
                    onChunk.accept("error: " + e.getMessage());
                    onComplete.run();
                }
                return null;
            };

            try {
                System.out.println("即将发起HTTP请求到通义千问...");
                restTemplate.execute(QWEN_API_URL, HttpMethod.POST, requestCallback, responseExtractor);
                System.out.println("HTTP请求已发起");
            } catch (Exception e) {
                System.out.println("HTTP请求异常: " + e.getMessage());
                onChunk.accept("error: " + e.getMessage());
                onComplete.run();
            }
        } catch (Exception e) {
            System.out.println("主流程异常: " + e.getMessage());
            onError.accept(e);
        }
    }
}

对应的Controller类

Java 复制代码
package com.baidu.streamapi.demos.web;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;

import javax.servlet.http.HttpServletResponse;

@RestController
public class StreamController {
  
    @Autowired
    private QwenService qwenService;

    @GetMapping(value = "/api/qwen/stream", produces = "text/event-stream;charset=UTF-8")
    public SseEmitter qwenStream(@RequestParam String prompt, HttpServletResponse response) {
        System.out.println("prompt: " + prompt);
        // 直接设置响应头,优先级高于 @GetMapping 的 produces 参数
        response.setContentType("text/event-stream; charset=UTF-8");
        response.setCharacterEncoding("UTF-8");

        SseEmitter emitter = new SseEmitter(0L); // 不超时
        qwenService.streamChatCompletions(
                prompt,
                chunk -> {
                    try {
                        System.out.println("====>qwenStream send chunk start");
                        // 明确指定SSE格式,确保每条消息以data: ...\n\n结尾
                        emitter.send(SseEmitter.event().data(chunk).reconnectTime(3000));
                        System.out.println("====>qwenStream send chunk end");
                    } catch (Exception e) {
                        emitter.completeWithError(e);
                    }
                },
                () -> {
                    emitter.complete();
                    System.out.println("====> SSE complete");
                },
                emitter::completeWithError
        );
        return emitter;
    }
} 

然后请求/api/qwen/stream这个接口, 会返回多条数据,每一条是在前一条的基础上补充一些信息,通过SSE流式协议返回。