流式调用
模型创建
java
复制代码
public static StreamingChatModel STREAMING_BASE_MODEL = OpenAiStreamingChatModel.builder()
.baseUrl("https://api.deepseek.com")
.apiKey(System.getenv("DS_API_KEY"))
.modelName("deepseek-v4-flash")
.logRequests(true)
.logResponses(true)
.build();
对话
java
复制代码
/**
* Response Streaming
*/
@Test
void test01() throws InterruptedException {
String userMessage = "给我讲一个笑话";
STREAMING_BASE_MODEL.chat(userMessage, new StreamingChatResponseHandler() {
// 部分响应
@Override
public void onPartialResponse(String partialResponse) {
System.out.println("onPartialResponse: " + partialResponse);
}
// 部分思考
@Override
public void onPartialThinking(PartialThinking partialThinking) {
System.out.println("onPartialThinking: " + partialThinking);
}
// 部分工具调用
@Override
public void onPartialToolCall(PartialToolCall partialToolCall) {
System.out.println("onPartialToolCall: " + partialToolCall);
}
// 工具调用完成
@Override
public void onCompleteToolCall(CompleteToolCall completeToolCall) {
System.out.println("onCompleteToolCall: " + completeToolCall);
}
// 完成调用
@Override
public void onCompleteResponse(ChatResponse completeResponse) {
System.out.println("onCompleteResponse: " + completeResponse);
}
// 异常
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});
Thread.sleep(1000000000000000L);
}
输出结果如下:
json
复制代码
{
"aiMessage": {
"text": "当然!这是一个经典的笑话:有一天,一只海鸥飞到一家咖啡馆,点了一杯咖啡和一块蛋糕。服务员惊讶地说:"您居然会说话?"海鸥淡定地回答:"这里的蛋糕,难道不会说话吗?"",
"thinking": null,
"toolExecutionRequests": [],
"attributes": {}
},
"metadata": {
"id": "8cf726c0-6b4e-4d56-8d78-9bd78228cdea",
"modelName": "deepseek-v4-flash",
"tokenUsage": {
"inputTokenCount": 8,
"inputTokensDetails": {
"cachedTokens": 0
},
"outputTokenCount": 90,
"outputTokensDetails": {
"reasoningTokens": 44
},
"totalTokenCount": 98
},
"finishReason": "STOP",
"created": 1778565664,
"serviceTier": "null",
"systemFingerprint": "fp_8b330d02d0_prod0820_fp8_kvcache_20260402",
"rawHttpResponse": "dev.langchain4j.http.client.SuccessfulHttpResponse@662e8336",
"rawServerSentEvents": [{
"event": null,
"data": "{\"id\":\"8cf726c0-6b4e-4d56-8d78-9bd78228cdea\",\"object\":\"chat.completion.chunk\",\"created\":1778565664,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_8b330d02d0_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"reasoning_content\":\"\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null}"
}],
"logProbs": null
}
}
简化如下:
java
复制代码
@Test
void test01() throws InterruptedException {
String userMessage = "给我讲一个笑话";
//StreamingChatResponseHandler handler = LambdaStreamingResponseHandler.onPartialResponse(System.out::println);
STREAMING_BASE_MODEL.chat(userMessage, LambdaStreamingResponseHandler.onPartialResponse(System.out::println));
STREAMING_BASE_MODEL.chat("Tell me a joke", LambdaStreamingResponseHandler.onPartialResponseAndError(System.out::print, Throwable::printStackTrace));
Thread.sleep(1000000000000000L);
}
流的取消
java
复制代码
@Test
void test02() throws InterruptedException {
String userMessage = "给我讲一个笑话";
boolean shouldCancel = true;
STREAMING_BASE_MODEL.chat(userMessage, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(PartialResponse partialResponse, PartialResponseContext context) {
System.out.println("onPartialResponse: " + partialResponse);
if (shouldCancel) {
// 取消
context.streamingHandle().cancel();
}
}
@Override
public void onCompleteResponse(ChatResponse completeResponse) {
System.out.println("onCompleteResponse: " + completeResponse);
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});
}