Atlas 800T A2部署qwen3-32b

目录

  • [一. 检查服务器、OS等配置](#一. 检查服务器、OS等配置)
  • [二. 驱动和固件下载与安装](#二. 驱动和固件下载与安装)
  • [三. 下载mindie镜像并打包](#三. 下载mindie镜像并打包)
  • [四. 下载权重文件](#四. 下载权重文件)
  • [五. 启动模型](#五. 启动模型)
    • [1. model_start.sh文件](#1. model_start.sh文件)
    • [2. conf.json文件](#2. conf.json文件)

一. 检查服务器、OS等配置

博主这里设备清单:

  1. 服务器型号:Atlas 800T A2
  2. 显卡类型:910B3,8张,单张显存64G
  3. 操作系统:openEuler-24.03(LTS-SP1)
  4. cpu指令架构:AArch64

二. 驱动和固件下载与安装

下载驱动链接:https://www.hiascend.com/hardware/firmware-drivers/community?product=4&model=26&cann=8.5.1&driver=Ascend+HDK+25.5.2

需要了解的点:

  • 910b3属于A2系列显卡;910c属于A3系列显卡。
  • 固件与驱动我们直接使用最新版本(Ascend HDK 25.5.2)
  • 910b3,一般使用8.5.1的CANN。

其他选项直接参考下图来选。

具体驱动安装参考:https://blog.csdn.net/mizhiakk/article/details/147305068

三. 下载mindie镜像并打包

使用 Docker Desktop下载 2.3.1-800I-A2-py311-openeuler24.03-lts 版本,具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

四. 下载权重文件

权重文件下载地址:

具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

五. 启动模型

1. model_start.sh文件

这里博主使用4张显卡来跑模型。

bash 复制代码
#!/usr/bin/sh 
docker run -it -d --shm-size 200g --net=host --name Qwen3-32b-new --privileged  --device=/dev/davinci_manager --device=/dev/hisi_hdc --device=/dev/devmm_svm  --device=/dev/davinci4     --device=/dev/davinci5     --device=/dev/davinci6     --device=/dev/davinci7     -v /usr/local/Ascend/driver:/usr/local/Ascend/driver     -v /usr/local/sbin:/usr/local/sbin     -v /mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B:/mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B ba625eff6e40 /bin/bash

2. conf.json文件

bash 复制代码
{
    "Version" : "1.0.0",

    "ServerConfig" :
    {
        "ipAddress" : "192.168.132.222",
        "managementIpAddress" : "127.0.0.2",
        "port" : 1025,
        "managementPort" : 1026,
        "metricsPort" : 1027,
        "allowAllZeroIpListening" : false,
        "maxLinkNum" : 1000,
        "httpsEnabled" : false,
        "fullTextEnabled" : false,
        "tlsCaPath" : "security/ca/",
        "tlsCaFile" : ["ca.pem"],
        "tlsCert" : "security/certs/server.pem",
        "tlsPk" : "security/keys/server.key.pem",
        "tlsPkPwd" : "security/pass/key_pwd.txt",
        "tlsCrlPath" : "security/certs/",
        "tlsCrlFiles" : ["server_crl.pem"],
        "managementTlsCaFile" : ["management_ca.pem"],
        "managementTlsCert" : "security/certs/management/server.pem",
        "managementTlsPk" : "security/keys/management/server.key.pem",
        "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
        "managementTlsCrlPath" : "security/management/certs/",
        "managementTlsCrlFiles" : ["server_crl.pem"],
        "metricsTlsCaFile" : ["metrics_ca.pem"],
        "metricsTlsCert" : "security/certs/metrics/server.pem",
        "metricsTlsPk" : "security/keys/metrics/server.key.pem",
        "metricsTlsPkPwd" : "security/pass/metrics/key_pwd.txt",
        "metricsTlsCrlPath" : "security/metrics/certs/",
        "metricsTlsCrlFiles" : ["server_crl.pem"],
        "kmcKsfMaster" : "tools/pmt/master/ksfa",
        "kmcKsfStandby" : "tools/pmt/standby/ksfb",
        "inferMode" : "standard",
        "interCommTLSEnabled" : true,
        "interCommPort" : 1121,
        "interCommTlsCaPath" : "security/grpc/ca/",
        "interCommTlsCaFiles" : ["ca.pem"],
        "interCommTlsCert" : "security/grpc/certs/server.pem",
        "interCommPk" : "security/grpc/keys/server.key.pem",
        "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
        "interCommTlsCrlPath" : "security/grpc/certs/",
        "interCommTlsCrlFiles" : ["server_crl.pem"],
        "openAiSupport" : "vllm",
        "tokenTimeout" : 600,
        "e2eTimeout" : 600,
        "distDPServerEnabled":false
    },

    "BackendConfig" : {
        "backendName" : "mindieservice_llm_engine",
        "modelInstanceNumber" : 1,
        "npuDeviceIds" : [[4,5,6,7]],
        "tokenizerProcessNumber" : 8,
        "multiNodesInferEnabled" : false,
        "multiNodesInferPort" : 1120,
        "interNodeTLSEnabled" : true,
        "interNodeTlsCaPath" : "security/grpc/ca/",
        "interNodeTlsCaFiles" : ["ca.pem"],
        "interNodeTlsCert" : "security/grpc/certs/server.pem",
        "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
        "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
        "interNodeTlsCrlPath" : "security/grpc/certs/",
        "interNodeTlsCrlFiles" : ["server_crl.pem"],
        "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
        "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
        "kvPoolConfig" : {"backend":"", "configPath":""},
        "ModelDeployConfig" :
        {
            "maxSeqLen" : 100000,
            "maxInputTokenLen" : 60000,
            "truncation" : false,
            "ModelConfig" : [
                {
                    "modelInstanceType" : "Standard",
                    "modelName" : "qwen3-32b",
                    "modelWeightPath" : "/mnt/disk2/modules/qwen3-32b/Qwen3-32B/Qwen3-32B/",
                    "worldSize" : 4,
                    "cpuMemSize" : 0,
                    "npuMemSize" : -1,
                    "backendType" : "atb",
                    "trustRemoteCode" : false,
                    "async_scheduler_wait_time": 120,
                    "kv_trans_timeout": 10,
                    "kv_link_timeout": 1080
                }
            ]
        },

        "ScheduleConfig" :
        {
            "templateType" : "Standard",
            "templateName" : "Standard_LLM",
            "cacheBlockSize" : 128,

            "maxPrefillBatchSize" : 50,
            "maxPrefillTokens" : 60000,
            "prefillTimeMsPerReq" : 150,
            "prefillPolicyType" : 0,

            "decodeTimeMsPerReq" : 50,
            "decodePolicyType" : 0,

            "maxBatchSize" : 50,
            "maxIterTimes" : 40000,
            "maxPreemptCount" : 0,
            "supportSelectBatch" : false,
            "maxQueueDelayMicroseconds" : 5000,
            "maxFirstTokenWaitTime": 2500
        }
    },

    "LogConfig": {
        "dynamicLogLevel" : "",
        "dynamicLogLevelValidHours" : 2,
        "dynamicLogLevelValidTime" : ""
    },

    "EnableDynamicAdjustTimeoutConfig": false
}

加载镜像、启动容器、验证等相关操作,请参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

相关推荐
珂玥c1 小时前
新增硬盘有脏数据如何处理——ubuntu16.04
linux·数据库·ide
wpeng56411 小时前
LINUX-FSTAB参数/SSD挂载
linux·运维·服务器
拓朗工控1 小时前
工业视觉检测:从像素到决策的智能制造之眼
人工智能·视觉检测·制造·工控机·工业电脑
互联网江湖1 小时前
腾讯的船“漏水”了,可灵AI会成为“补丁”吗?
大数据·人工智能
源来猿往1 小时前
语音识别AI之----fbank和mfcc
人工智能·语音识别
金融小师妹1 小时前
基于全球能源供给扰动模型的原油市场重构:沙特产量跌至36年低位后的供需链式分析
大数据·人工智能·逻辑回归
玖日大大1 小时前
2026十大LLM研究突破:扩散语言模型挑战自回归、Unicode隐形注入、AI操纵性评估 — 大模型从狂飙走向可控
人工智能·语言模型·回归·llm·论文解读·ai agent·ai安全
汽车仪器仪表相关领域1 小时前
Debron OVM 1052 光学关门速度仪:汽车门盖检测的高精度便携工具 + 生产线适配 + 耐久性监测,整车制造与质量控制的黄金标准
人工智能·功能测试·单元测试·汽车·制造·可用性测试
网络工程小王1 小时前
【LangGraph 状态持久化(Checkpoint)详解】学习笔记
jvm·人工智能·笔记·langchain