AI 大模型应用进阶系列(三):大模型流式输出

带思考能力模型的流式输出

返回数据

  • 存在 reasoning_content,说明正在思考
  • 不存在 reasoning_content 说明思考已经结束
  • 返回 [DONE],说明已经完成
json 复制代码
 {
    "choices": [
        {
            "delta": {
                "content": "xxx",
                "reasoning_content": "xxx",
                "role": "assistant"
            },
            "index": 0
        }
    ],
    "created": xxx,
    "id": "xxx",
    "model": "xxx",
    "service_tier": "default",
    "object": "chat.completion.chunk",
    "usage": null
} 

代码逻辑

python 复制代码
import sys
import json
import requests

# 定义模型配置
_ai_config = {
    "model": "deepseek-reasoner",
    "url": "https://api.deepseek.com/chat/completions",
    "key": "you deepseek key",
}

# 开始思考
def _on_think_start():
    print("think start")

# 思考借宿
def _on_think_end():
    print("think end")

# 正在思考
def _on_thinking(chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 流式接收
def _on_receiving(full_text, chunk_text):
    sys.stdout.write(chunk_text)
    sys.stdout.flush()

# 输出结束
def _on_finish(full_text):
    print("finish: " + full_text)


# 流式调用
def chat_stream(
    histories,
    ai_config=None,
    on_receiving=None,
    on_finish=None,
    on_thinking=None,
    on_think_start=None,
    on_think_end=None,
    response_format="text",
):

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {ai_config['key']}",
    }

    payload = {
        "model": ai_config["model"],
        "messages": histories,
        "response_format": {"type": response_format},
        "stream": True,
    }

    full_content = ""

    try:
        # 发送POST请求,设置stream=True以启用流式响应
        with requests.post(
            ai_config["url"], headers=headers, json=payload, stream=True, timeout=60
        ) as response:
            # 检查响应状态码
            response.raise_for_status()

            # 明确设置响应编码为UTF-8,解决中文乱码问题
            response.encoding = "utf-8"

            is_thinking = False
            full_content = ""

            # 流式处理响应内容
            for line in response.iter_lines(decode_unicode=True):

                if line:
                    if line.startswith("data: ") and not line.startswith(
                        "data: [DONE]"
                    ):
                        data = json.loads(line[6:])
                        # 提取并处理返回的内容(这里假设返回格式为OpenAI API风格)
                        if "choices" in data and len(data["choices"]) > 0:
                            delta = data["choices"][0].get("delta", {})
                            if "reasoning_content" in delta:
                                # 存在reasoning_content,说明正在思考
                                current_thinking = True
                            else:
                                # 没有reasoning_content,说明已经思考结束
                                current_thinking = False

                            if current_thinking is True and is_thinking is False:
                                is_thinking = current_thinking
                                if on_think_start is not None:
                                    on_think_start()
                                    continue
                            if current_thinking is False and is_thinking is True:
                                is_thinking = current_thinking
                                if on_think_end is not None:
                                    on_think_end()
                                    continue

                            is_thinking = current_thinking

                            if is_thinking is True:
                                if on_thinking is not None:
                                    on_thinking(delta.get("reasoning_content", ""))
                                    continue

                            content = delta.get("content", "")
                            full_content += content
                            if on_receiving is not None:
                                on_receiving(full_content, content)

        if on_finish is not None:
            on_finish(full_content)
    except requests.exceptions.RequestException as e:
        print(f"请求异常: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON解析错误: {e}")
    except Exception as e:
        print(f"发生未知错误: {e}")

    return full_content

# 调用大模型
chat_stream(
    ai_config=_ai_config,
    on_think_start=_on_think_start,
    on_think_end=_on_think_end,
    on_thinking=_on_thinking,
    on_receiving=_on_receiving,
    on_finish=_on_finish,
    histories=[
        {
            "role": "user",
            "content": "你好",
        }
    ],
)
相关推荐
喜欢吃豆1 小时前
llama.cpp 全方位技术指南:从底层原理到实战部署
人工智能·语言模型·大模型·llama·量化·llama.cpp
e6zzseo2 小时前
独立站的优势和劣势和运营技巧
大数据·人工智能
富唯智能3 小时前
移动+协作+视觉:开箱即用的下一代复合机器人如何重塑智能工厂
人工智能·工业机器人·复合机器人
Antonio9154 小时前
【图像处理】图像的基础几何变换
图像处理·人工智能·计算机视觉
新加坡内哥谈技术5 小时前
Perplexity AI 的 RAG 架构全解析:幕后技术详解
人工智能
武子康5 小时前
AI研究-119 DeepSeek-OCR PyTorch FlashAttn 2.7.3 推理与部署 模型规模与资源详细分析
人工智能·深度学习·机器学习·ai·ocr·deepseek·deepseek-ocr
Sirius Wu6 小时前
深入浅出:Tongyi DeepResearch技术解读
人工智能·语言模型·langchain·aigc
忙碌5446 小时前
AI大模型时代下的全栈技术架构:从深度学习到云原生部署实战
人工智能·深度学习·架构
LZ_Keep_Running7 小时前
智能变电巡检:AI检测新突破
人工智能
InfiSight智睿视界7 小时前
AI 技术助力汽车美容行业实现精细化运营管理
大数据·人工智能