ModelScope下载模型+ vLLM调用+内存释放

python 复制代码

source ～/vllm-env/bin/activate

# 基础依赖
pip install modelscope vllm

测试代码

python 复制代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
LLM 服务测试脚本
"""

import sys
import os

# 添加项目根目录到 Python 路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from llm_server.llm_class import LLMService


def test_llm_basic():
    """测试基本的 LLM 功能"""
    print("\n=== 测试基本 LLM 功能 ===")
    
    # 初始化 LLM 服务
    llm = LLMService()
    
    # 准备测试消息
    messages = [
        {
            "role": "user",
            "content": "你好，你是谁？"
        }
    ]
    
    print("发送消息到 LLM 服务...")
    
    try:
        # 调用 LLM 服务（非流式）
        response = llm.create(messages)
        
        print("\n=== LLM 响应 ===")
        print(f"响应状态: 成功")
        print(f"响应类型: {type(response)}")
        
        # 提取并打印生成的内容
        if "choices" in response:
            for i, choice in enumerate(response["choices"]):
                if "message" in choice and "content" in choice["message"]:
                    print(f"\n生成内容 {i+1}:")
                    print(choice["message"]["content"])
        
        print("\n=== 测试完成 ===")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False


def test_llm_stream():
    """测试流式 LLM 功能"""
    print("\n=== 测试流式 LLM 功能 ===")
    
    # 初始化 LLM 服务
    llm = LLMService()
    
    # 准备测试消息
    messages = [
        {
            "role": "user", 
            "content": "请简要介绍一下人工智能的发展历史"
        }
    ]
    
    print("发送流式请求到 LLM 服务...")
    
    try:
        # 回调函数，用于打印流式输出
        def callback(chunk):
            print(chunk, end="", flush=True)
        
        # 调用流式方法
        print("\n=== 流式响应 ===")
        chunks = []
        for chunk in llm.create_stream(messages, callback=callback):
            chunks.append(chunk)
        
        print("\n\n=== 流式测试完成 ===")
        print(f"共接收 {len(chunks)} 个响应块")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False


def test_llm_multi_turn():
    """测试多轮对话功能"""
    print("\n=== 测试多轮对话功能 ===")
    
    # 初始化 LLM 服务
    llm = LLMService()
    
    # 准备多轮对话消息
    messages = [
        {
            "role": "user",
            "content": "你好，我想了解一下 Python 编程语言"
        },
        {
            "role": "assistant",
            "content": "Python 是一种高级编程语言，以其简洁的语法和强大的生态系统而闻名。它广泛应用于数据分析、机器学习、Web 开发等领域。"
        },
        {
            "role": "user",
            "content": "那 Python 和 JavaScript 有什么区别呢？"
        }
    ]
    
    print("发送多轮对话请求到 LLM 服务...")
    
    try:
        # 调用 LLM 服务
        response = llm.create(messages)
        
        print("\n=== LLM 多轮对话响应 ===")
        
        # 提取并打印生成的内容
        if "choices" in response:
            for i, choice in enumerate(response["choices"]):
                if "message" in choice and "content" in choice["message"]:
                    print(f"\n生成内容 {i+1}:")
                    print(choice["message"]["content"])
        
        print("\n=== 多轮对话测试完成 ===")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False


if __name__ == "__main__":
    print("开始测试 LLM 服务...")
    
    # 运行各项测试
    test_results = []
    
    test_results.append("基本功能测试: " + ("通过" if test_llm_basic() else "失败"))
    test_results.append("流式功能测试: " + ("通过" if test_llm_stream() else "失败"))
    test_results.append("多轮对话测试: " + ("通过" if test_llm_multi_turn() else "失败"))
    
    # 打印测试结果摘要
    print("\n=== 测试结果摘要 ===")
    for result in test_results:
        print(result)
    
    print("\nLLM 测试完成！")

python 复制代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
VLM 服务测试脚本
"""

import sys
import os

# 添加项目根目录到 Python 路径
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from llm_server.llm_class import VLMService

def test_vlm_basic():
    """测试基本的 VLM 功能"""
    print("\n=== 测试基本 VLM 功能 ===")
    
    # 初始化 VLM 服务
    vlm = VLMService()
    
    # 准备测试消息
    messages = [
        {
            "role": "user",
            "content": "这张图片里有什么？"
        }
    ]
    
    # 使用一张测试图片（如果存在）
    test_image = None
    # 检查是否有测试图片
    if os.path.exists("yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png"):
        test_image = "yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png"
        print(f"使用测试图片: {test_image}")
    else:
        print("警告: 未找到测试图片，将只测试文本功能")
    
    print("发送消息到 VLM 服务...")
    
    try:
        # 调用 VLM 服务
        response = vlm.create_with_image(messages, image_source=test_image)
        
        print("\n=== VLM 响应 ===")
        print(f"响应状态: 成功")
        print(f"响应类型: {type(response)}")
        
        # 提取并打印生成的内容
        if "choices" in response:
            for i, choice in enumerate(response["choices"]):
                if "message" in choice and "content" in choice["message"]:
                    print(f"\n生成内容 {i+1}:")
                    print(choice["message"]["content"])
        
        print("\n=== 测试完成 ===")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False

def test_vlm_multiple_images():
    """测试 VLM 多图功能"""
    print("\n=== 测试 VLM 多图功能 ===")
    
    # 初始化 VLM 服务
    vlm = VLMService()
    
    # 准备测试消息
    messages = [
        {
            "role": "user",
            "content": "比较这两张图片的不同之处"
        }
    ]
    
    # 收集测试图片
    test_images = []
    image_dir = "yolo/findgate_data"
    if os.path.exists(image_dir):
        # 获取前两张图片
        for img_file in os.listdir(image_dir):
            if img_file.endswith(".png") and len(test_images) < 2:
                test_images.append(os.path.join(image_dir, img_file))
    
    if len(test_images) >= 2:
        print(f"使用测试图片: {test_images}")
    else:
        print("警告: 未找到足够的测试图片，将只测试文本功能")
    
    print("发送多图请求到 VLM 服务...")
    
    try:
        # 调用 VLM 服务
        response = vlm.create_with_multiple_images(messages, image_sources=test_images)
        
        print("\n=== VLM 多图响应 ===")
        print(f"响应状态: 成功")
        print(f"响应类型: {type(response)}")
        
        # 提取并打印生成的内容
        if "choices" in response:
            for i, choice in enumerate(response["choices"]):
                if "message" in choice and "content" in choice["message"]:
                    print(f"\n生成内容 {i+1}:")
                    print(choice["message"]["content"])
        
        print("\n=== 多图测试完成 ===")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False

def test_vlm_conversation():
    """测试 VLM 多轮对话功能"""
    print("\n=== 测试 VLM 多轮对话功能 ===")
    
    # 初始化 VLM 服务
    vlm = VLMService()
    
    # 准备历史对话
    conversation_history = [
        {
            "role": "user",
            "content": "你好"
        },
        {
            "role": "assistant",
            "content": "你好！我是一个视觉语言模型，可以理解图片和文字。请问有什么可以帮助你的？"
        }
    ]
    
    # 当前查询
    current_query = "请描述一下这张图片"
    
    # 使用一张测试图片（如果存在）
    test_image = None
    if os.path.exists("yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png"):
        test_image = "yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png"
        print(f"使用测试图片: {test_image}")
    
    print("发送多轮对话请求到 VLM 服务...")
    
    try:
        # 调用 VLM 服务
        response = vlm.create_multimodal_conversation(conversation_history, current_query, image_source=test_image)
        
        print("\n=== VLM 多轮对话响应 ===")
        print(f"响应状态: 成功")
        print(f"响应类型: {type(response)}")
        
        # 提取并打印生成的内容
        if "choices" in response:
            for i, choice in enumerate(response["choices"]):
                if "message" in choice and "content" in choice["message"]:
                    print(f"\n生成内容 {i+1}:")
                    print(choice["message"]["content"])
        
        print("\n=== 多轮对话测试完成 ===")
        return True
        
    except Exception as e:
        print(f"\n测试失败: {str(e)}")
        return False

if __name__ == "__main__":
    print("开始测试 VLM 服务...")
    
    # 运行各项测试
    test_results = []
    
    test_results.append("基本功能测试: " + ("通过" if test_vlm_basic() else "失败"))
    test_results.append("多图功能测试: " + ("通过" if test_vlm_multiple_images() else "失败"))
    test_results.append("多轮对话测试: " + ("通过" if test_vlm_conversation() else "失败"))
    
    # 打印测试结果摘要
    print("\n=== 测试结果摘要 ===")
    for result in test_results:
        print(result)
    
    print("\nVLM 测试完成！")

vllm Qwen2.5-0.5B输出乱码解决办法用-Instruct版本的-CSDN博客

复制代码

thon_server\micromambavenv\python.exe' 'c:\Users\njsgcs\.trae-cn\extensions\ms-python.debugpy-2025.18.0-win32-x64\bundled\libs\debugpy\launcher' '57503' '--' 'E:\code\my_python_server\test_vlm.py'
开始测试 VLM 服务...

=== 测试基本 VLM 功能 ===
VLM服务初始化完成，模型: /root/models/OpenBMB_MiniCPM-V-2_6-int4
使用测试图片: yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png
发送消息到 VLM 服务...
[VLM调试] 成功收到响应

=== VLM 响应 ===
响应状态: 成功
响应类型: <class 'dict'>

生成内容 1:
这张图片描绘了一个数字渲染的场景，可能来自游戏或视频，其中包 含几个元素。中央人物是一个发型像古代地中海或中东服装的男性。 场景看起来是现代城市环境，结合了传统的建筑风格，如古典柱在建 筑物的背景中可见。画面中的色调温暖，主要是温暖的棕色和米色， 突显了建筑质感和傍晚的金黄色阳光。人物与门和车辆的互动可能表 明了一个故事情节或游戏机制，比如接近或互动。

=== 测试完成 ===

=== 测试 VLM 多图功能 ===
VLM服务初始化完成，模型: /root/models/OpenBMB_MiniCPM-V-2_6-int4
使用测试图片: ['yolo/findgate_data\\Snipaste_2026-01-13_23-40-40.png', 'yolo/findgate_data\\Snipaste_2026-01-13_23-41-19.png']
发送多图请求到 VLM 服务...
[VLM调试] 成功收到响应

=== VLM 多图响应 ===
响应状态: 成功
响应类型: <class 'dict'>

生成内容 1:
在这两张图片中，有以下不同之处：

1. 图片中的个人物：
   - **第一张图片**：人物站在前景，面向画面左侧，向围栏外望去。
   - **第二张图片**：人物转身，面向画面右侧，向前走去。      

2. 背景中的其他人物：
   - **第一张图片**：背景中没有其他人物。
   - **第二张图片**：背景中有两个新出现的人物，一个靠近中心，另一个靠近右侧。

3. 背景中的物体和环境：
   - **第一张图片**：背景环境较为简单，主要显示围栏和左侧部分建筑物。
   - **第二张图片**：背景中的元素有所变化，包括右侧出现了一个新的物体（可能是桶或其他容器）以及局部环境中的其他细节。     

这些不同之处表明两张图片是在不同时间或不同视角拍摄的。       

=== 多图测试完成 ===

=== 测试 VLM 多轮对话功能 ===
VLM服务初始化完成，模型: /root/models/OpenBMB_MiniCPM-V-2_6-int4
使用测试图片: yolo/findgate_data/Snipaste_2026-01-13_23-40-40.png
发送多轮对话请求到 VLM 服务...
[VLM调试] 成功收到响应

=== VLM 多轮对话响应 ===
响应状态: 成功
响应类型: <class 'dict'>

生成内容 1:
很抱歉，我无法帮助您描述图片中的内容。

服务创建代码

python 复制代码

import os
import subprocess
import signal
import time
from modelscope import snapshot_download

# ======================
# 配置模型
# ======================
LLM_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
VLM_MODEL = "OpenBMB/MiniCPM-V-2_6-int4"  # 或 MiniCPM-V-2_6（非量化）

LLM_DIR = f"/root/models/{LLM_MODEL.replace('/', '_')}"
VLM_DIR = f"/root/models/{VLM_MODEL.replace('/', '_')}"

# 存储进程对象
processes = []

# ======================
# 下载模型（ModelScope）
# ======================
def download_models():
    for name, path in [(LLM_MODEL, LLM_DIR), (VLM_MODEL, VLM_DIR)]:
        if not os.path.exists(path) or not os.listdir(path):
            print(f"📥 下载模型: {name}")
            os.makedirs(path, exist_ok=True)
            snapshot_download(model_id=name, local_dir=path)
        else:
            print(f"✅ 模型已存在: {path}")

def start_process(cmd, description):
    print(f"🚀 启动 {description}...")
    process = subprocess.Popen(cmd, shell=True, preexec_fn=os.setsid)
    processes.append(process)
    return process

def cleanup_processes():
    print("\n🔄 清理进程...")
    for process in processes:
        try:
            os.killpg(os.getpgid(process.pid), signal.SIGTERM)
            process.wait(timeout=5)
        except:
            try:
                os.killpg(os.getpgid(process.pid), signal.SIGKILL)
            except:
                pass
    print("✅ 所有进程已清理完毕")

def signal_handler(sig, frame):
    print("\n⚠️  收到终止信号，正在清理...")
    cleanup_processes()
    exit(0)

if __name__ == "__main__":
    # 注册信号处理
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)
    
    download_models()
    
    llm_cmd = (
        f"python -m vllm.entrypoints.openai.api_server "
        f"--model {LLM_DIR} "
        f"--host 0.0.0.0 --port 8000 "
        f"--dtype float16 "
        f"--gpu-memory-utilization 0.5"  # 降低内存利用率
    )
    
    vlm_cmd = (
        f"python -m vllm.entrypoints.openai.api_server "
        f"--model {VLM_DIR} "
        f"--host 0.0.0.0 --port 8001 "
        f"--dtype bfloat16 "                 # RTX 4060 更适合 float16
        f"--trust-remote-code "
        f"--max-model-len 1024 "            # 进一步缩短
    )
    
    print("\n🌐 访问地址:")
    print(f"   - LLM: http://localhost:8000/v1")
    print(f"   - VLM: http://localhost:8001/v1")
    
    # 启动服务
    vlm_process = start_process(vlm_cmd, "VLM 服务 (端口 8001)")
    time.sleep(2)  # 等待VLM服务启动
    llm_process = start_process(llm_cmd, "LLM 服务 (端口 8000)")
    
    print("\n✅ 所有服务已启动")
    print("📝 按 Ctrl+C 停止所有服务...")
    
    # 等待进程结束
    try:
        for process in processes:
            process.wait()
    except KeyboardInterrupt:
        cleanup_processes()