AI 大模型应用进阶系列(三):大模型流式输出

带思考能力模型的流式输出

返回数据

  • 存在 reasoning_content,说明正在思考
  • 不存在 reasoning_content 说明思考已经结束
  • 返回 [DONE],说明已经完成
json 复制代码
 {
    "choices": [
        {
            "delta": {
                "content": "xxx",
                "reasoning_content": "xxx",
                "role": "assistant"
            },
            "index": 0
        }
    ],
    "created": xxx,
    "id": "xxx",
    "model": "xxx",
    "service_tier": "default",
    "object": "chat.completion.chunk",
    "usage": null
} 

代码逻辑

python 复制代码
import sys
import json
import requests

# 定义模型配置
_ai_config = {
    "model": "deepseek-reasoner",
    "url": "https://api.deepseek.com/chat/completions",
    "key": "you deepseek key",
}

# 开始思考
def _on_think_start():
    print("think start")

# 思考借宿
def _on_think_end():
    print("think end")

# 正在思考
def _on_thinking(chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 流式接收
def _on_receiving(full_text, chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 输出结束
def _on_finish(full_text):
    print("finish: " + full_text)


# 流式调用
def chat_stream(
    histories,
    ai_config=None,
    on_receiving=None,
    on_finish=None,
    on_thinking=None,
    on_think_start=None,
    on_think_end=None,
    response_format="text",
):

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {ai_config['key']}",
    }

    payload = {
        "model": ai_config["model"],
        "messages": histories,
        "response_format": {"type": response_format},
        "stream": True,
    }

    full_content = ""

    try:
        # 发送POST请求,设置stream=True以启用流式响应
        with requests.post(
            ai_config["url"], headers=headers, json=payload, stream=True, timeout=60
        ) as response:
            # 检查响应状态码
            response.raise_for_status()

            # 明确设置响应编码为UTF-8,解决中文乱码问题
            response.encoding = "utf-8"

            is_thinking = False
            full_content = ""

            # 流式处理响应内容
            for line in response.iter_lines(decode_unicode=True):

                if line:
                    if line.startswith("data: ") and not line.startswith(
                        "data: [DONE]"
                    ):
                        data = json.loads(line[6:])
                        # 提取并处理返回的内容(这里假设返回格式为OpenAI API风格)
                        if "choices" in data and len(data["choices"]) > 0:
                            delta = data["choices"][0].get("delta", {})
                            if "reasoning_content" in delta:
                                # 存在reasoning_content,说明正在思考
                                current_thinking = True
                            else:
                                # 没有reasoning_content,说明已经思考结束
                                current_thinking = False

                            if current_thinking is True and is_thinking is False:
                                is_thinking = current_thinking
                                if on_think_start is not None:
                                    on_think_start()
                                    continue
                            if current_thinking is False and is_thinking is True:
                                is_thinking = current_thinking
                                if on_think_end is not None:
                                    on_think_end()
                                    continue

                            is_thinking = current_thinking

                            if is_thinking is True:
                                if on_thinking is not None:
                                    on_thinking(delta.get("reasoning_content", ""))
                                    continue

                            content = delta.get("content", "")
                            full_content += content
                            if on_receiving is not None:
                                on_receiving(full_content, content)

        if on_finish is not None:
            on_finish(full_content)
    except requests.exceptions.RequestException as e:
        print(f"请求异常: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON解析错误: {e}")
    except Exception as e:
        print(f"发生未知错误: {e}")

    return full_content

# 调用大模型
chat_stream(
    ai_config=_ai_config,
    on_think_start=_on_think_start,
    on_think_end=_on_think_end,
    on_thinking=_on_thinking,
    on_receiving=_on_receiving,
    on_finish=_on_finish,
    histories=[
        {
            "role": "user",
            "content": "你好",
        }
    ],
)
相关推荐
UMI赋能企业22 分钟前
AI数据仓库的核心优势解析
大数据·人工智能
Elastic 中国社区官方博客37 分钟前
Elasticsearch:如何使用 Qwen3 来做向量搜索
大数据·人工智能·elasticsearch·搜索引擎·全文检索
Black_Rock_br1 小时前
GraphRAG:AI理解复杂知识的未知领域,开启探索之旅
人工智能
失散136 小时前
深度学习——03 神经网络(2)-损失函数
人工智能·深度学习·神经网络·损失函数
商业讯6 小时前
大模型驱动的服务革命:2025智能客服机器人选型与落地路径
人工智能·机器人
mortimer8 小时前
Hugging Face 下载模型踩坑记:从符号链接到网络错误
人工智能·python·ai编程
一株月见草哇11 小时前
Matlab(4)
人工智能·算法·matlab
IMER SIMPLE11 小时前
人工智能-python-机器学习-线性回归与梯度下降:理论与实践
人工智能·python·机器学习
lxmyzzs11 小时前
【图像算法 - 12】OpenCV-Python 入门指南:图像视频处理与可视化(代码实战 + 视频教程 + 人脸识别项目讲解)
人工智能·opencv·计算机视觉
hans汉斯11 小时前
基于深度学习的苹果品质智能检测算法研究
人工智能·深度学习·算法