AI 大模型应用进阶系列(三):大模型流式输出

带思考能力模型的流式输出

返回数据

  • 存在 reasoning_content,说明正在思考
  • 不存在 reasoning_content 说明思考已经结束
  • 返回 DONE,说明已经完成
json 复制代码
 {
    "choices": [
        {
            "delta": {
                "content": "xxx",
                "reasoning_content": "xxx",
                "role": "assistant"
            },
            "index": 0
        }
    ],
    "created": xxx,
    "id": "xxx",
    "model": "xxx",
    "service_tier": "default",
    "object": "chat.completion.chunk",
    "usage": null
} 

代码逻辑

python 复制代码
import sys
import json
import requests

# 定义模型配置
_ai_config = {
    "model": "deepseek-reasoner",
    "url": "https://api.deepseek.com/chat/completions",
    "key": "you deepseek key",
}

# 开始思考
def _on_think_start():
    print("think start")

# 思考借宿
def _on_think_end():
    print("think end")

# 正在思考
def _on_thinking(chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 流式接收
def _on_receiving(full_text, chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 输出结束
def _on_finish(full_text):
    print("finish: " + full_text)


# 流式调用
def chat_stream(
    histories,
    ai_config=None,
    on_receiving=None,
    on_finish=None,
    on_thinking=None,
    on_think_start=None,
    on_think_end=None,
    response_format="text",
):

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {ai_config['key']}",
    }

    payload = {
        "model": ai_config["model"],
        "messages": histories,
        "response_format": {"type": response_format},
        "stream": True,
    }

    full_content = ""

    try:
        # 发送POST请求,设置stream=True以启用流式响应
        with requests.post(
            ai_config["url"], headers=headers, json=payload, stream=True, timeout=60
        ) as response:
            # 检查响应状态码
            response.raise_for_status()

            # 明确设置响应编码为UTF-8,解决中文乱码问题
            response.encoding = "utf-8"

            is_thinking = False
            full_content = ""

            # 流式处理响应内容
            for line in response.iter_lines(decode_unicode=True):

                if line:
                    if line.startswith("data: ") and not line.startswith(
                        "data: [DONE]"
                    ):
                        data = json.loads(line[6:])
                        # 提取并处理返回的内容(这里假设返回格式为OpenAI API风格)
                        if "choices" in data and len(data["choices"]) > 0:
                            delta = data["choices"][0].get("delta", {})
                            if "reasoning_content" in delta:
                                # 存在reasoning_content,说明正在思考
                                current_thinking = True
                            else:
                                # 没有reasoning_content,说明已经思考结束
                                current_thinking = False

                            if current_thinking is True and is_thinking is False:
                                is_thinking = current_thinking
                                if on_think_start is not None:
                                    on_think_start()
                                    continue
                            if current_thinking is False and is_thinking is True:
                                is_thinking = current_thinking
                                if on_think_end is not None:
                                    on_think_end()
                                    continue

                            is_thinking = current_thinking

                            if is_thinking is True:
                                if on_thinking is not None:
                                    on_thinking(delta.get("reasoning_content", ""))
                                    continue

                            content = delta.get("content", "")
                            full_content += content
                            if on_receiving is not None:
                                on_receiving(full_content, content)

        if on_finish is not None:
            on_finish(full_content)
    except requests.exceptions.RequestException as e:
        print(f"请求异常: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON解析错误: {e}")
    except Exception as e:
        print(f"发生未知错误: {e}")

    return full_content

# 调用大模型
chat_stream(
    ai_config=_ai_config,
    on_think_start=_on_think_start,
    on_think_end=_on_think_end,
    on_thinking=_on_thinking,
    on_receiving=_on_receiving,
    on_finish=_on_finish,
    histories=[
        {
            "role": "user",
            "content": "你好",
        }
    ],
)
相关推荐
Hyyy8 小时前
SSE和WebSocket 是什么,AI 场景下如何选择
llm
冬奇Lab8 小时前
Workflow 系列(04):Multi-Agent 协调——编排器边界、并发控制与上下文隔离
人工智能·工作流引擎
冬奇Lab9 小时前
每日一个开源项目(第147篇):HyperGraphRAG - 用超图表示 N 元关系,RAG 的第三代范式
人工智能·开源·graphql
甲维斯9 小时前
Github + 阿里云oss实现类似codex的自动更新!
人工智能
阿里云大数据AI技术11 小时前
光轮智能 × 阿里云:共建 Physical AI 云上数据、评测与持续学习基础设施
人工智能·机器学习
机器之心11 小时前
实锤了:Claude Code偷查用户,时区、中国AI实验室全是关键词
人工智能·openai
网易云信11 小时前
Cursor点燃个人开发者,企业级AI为何频频受挫?Agent工厂从提效工具到AI员工的跃迁
人工智能·开源
网易云信11 小时前
解锁触手可及的温暖:网易智企 x Wander Puffs AI 云游泡芙
人工智能