Atlas 800T A2部署qwen3-32b

目录

  • [一. 检查服务器、OS等配置](#一. 检查服务器、OS等配置)
  • [二. 驱动和固件下载与安装](#二. 驱动和固件下载与安装)
  • [三. 下载mindie镜像并打包](#三. 下载mindie镜像并打包)
  • [四. 下载权重文件](#四. 下载权重文件)
  • [五. 启动模型](#五. 启动模型)
    • [1. model_start.sh文件](#1. model_start.sh文件)
    • [2. conf.json文件](#2. conf.json文件)

一. 检查服务器、OS等配置

博主这里设备清单:

  1. 服务器型号:Atlas 800T A2
  2. 显卡类型:910B3,8张,单张显存64G
  3. 操作系统:openEuler-24.03(LTS-SP1)
  4. cpu指令架构:AArch64

二. 驱动和固件下载与安装

下载驱动链接:https://www.hiascend.com/hardware/firmware-drivers/community?product=4&model=26&cann=8.5.1&driver=Ascend+HDK+25.5.2

需要了解的点:

  • 910b3属于A2系列显卡;910c属于A3系列显卡。
  • 固件与驱动我们直接使用最新版本(Ascend HDK 25.5.2)
  • 910b3,一般使用8.5.1的CANN。

其他选项直接参考下图来选。

具体驱动安装参考:https://blog.csdn.net/mizhiakk/article/details/147305068

三. 下载mindie镜像并打包

使用 Docker Desktop下载 2.3.1-800I-A2-py311-openeuler24.03-lts 版本,具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

四. 下载权重文件

权重文件下载地址:

具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

五. 启动模型

1. model_start.sh文件

这里博主使用4张显卡来跑模型。

bash 复制代码
#!/usr/bin/sh 
docker run -it -d --shm-size 200g --net=host --name Qwen3-32b-new --privileged  --device=/dev/davinci_manager --device=/dev/hisi_hdc --device=/dev/devmm_svm  --device=/dev/davinci4     --device=/dev/davinci5     --device=/dev/davinci6     --device=/dev/davinci7     -v /usr/local/Ascend/driver:/usr/local/Ascend/driver     -v /usr/local/sbin:/usr/local/sbin     -v /mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B:/mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B ba625eff6e40 /bin/bash

2. conf.json文件

bash 复制代码
{
    "Version" : "1.0.0",

    "ServerConfig" :
    {
        "ipAddress" : "192.168.132.222",
        "managementIpAddress" : "127.0.0.2",
        "port" : 1025,
        "managementPort" : 1026,
        "metricsPort" : 1027,
        "allowAllZeroIpListening" : false,
        "maxLinkNum" : 1000,
        "httpsEnabled" : false,
        "fullTextEnabled" : false,
        "tlsCaPath" : "security/ca/",
        "tlsCaFile" : ["ca.pem"],
        "tlsCert" : "security/certs/server.pem",
        "tlsPk" : "security/keys/server.key.pem",
        "tlsPkPwd" : "security/pass/key_pwd.txt",
        "tlsCrlPath" : "security/certs/",
        "tlsCrlFiles" : ["server_crl.pem"],
        "managementTlsCaFile" : ["management_ca.pem"],
        "managementTlsCert" : "security/certs/management/server.pem",
        "managementTlsPk" : "security/keys/management/server.key.pem",
        "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
        "managementTlsCrlPath" : "security/management/certs/",
        "managementTlsCrlFiles" : ["server_crl.pem"],
        "metricsTlsCaFile" : ["metrics_ca.pem"],
        "metricsTlsCert" : "security/certs/metrics/server.pem",
        "metricsTlsPk" : "security/keys/metrics/server.key.pem",
        "metricsTlsPkPwd" : "security/pass/metrics/key_pwd.txt",
        "metricsTlsCrlPath" : "security/metrics/certs/",
        "metricsTlsCrlFiles" : ["server_crl.pem"],
        "kmcKsfMaster" : "tools/pmt/master/ksfa",
        "kmcKsfStandby" : "tools/pmt/standby/ksfb",
        "inferMode" : "standard",
        "interCommTLSEnabled" : true,
        "interCommPort" : 1121,
        "interCommTlsCaPath" : "security/grpc/ca/",
        "interCommTlsCaFiles" : ["ca.pem"],
        "interCommTlsCert" : "security/grpc/certs/server.pem",
        "interCommPk" : "security/grpc/keys/server.key.pem",
        "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
        "interCommTlsCrlPath" : "security/grpc/certs/",
        "interCommTlsCrlFiles" : ["server_crl.pem"],
        "openAiSupport" : "vllm",
        "tokenTimeout" : 600,
        "e2eTimeout" : 600,
        "distDPServerEnabled":false
    },

    "BackendConfig" : {
        "backendName" : "mindieservice_llm_engine",
        "modelInstanceNumber" : 1,
        "npuDeviceIds" : [[4,5,6,7]],
        "tokenizerProcessNumber" : 8,
        "multiNodesInferEnabled" : false,
        "multiNodesInferPort" : 1120,
        "interNodeTLSEnabled" : true,
        "interNodeTlsCaPath" : "security/grpc/ca/",
        "interNodeTlsCaFiles" : ["ca.pem"],
        "interNodeTlsCert" : "security/grpc/certs/server.pem",
        "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
        "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
        "interNodeTlsCrlPath" : "security/grpc/certs/",
        "interNodeTlsCrlFiles" : ["server_crl.pem"],
        "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
        "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
        "kvPoolConfig" : {"backend":"", "configPath":""},
        "ModelDeployConfig" :
        {
            "maxSeqLen" : 100000,
            "maxInputTokenLen" : 60000,
            "truncation" : false,
            "ModelConfig" : [
                {
                    "modelInstanceType" : "Standard",
                    "modelName" : "qwen3-32b",
                    "modelWeightPath" : "/mnt/disk2/modules/qwen3-32b/Qwen3-32B/Qwen3-32B/",
                    "worldSize" : 4,
                    "cpuMemSize" : 0,
                    "npuMemSize" : -1,
                    "backendType" : "atb",
                    "trustRemoteCode" : false,
                    "async_scheduler_wait_time": 120,
                    "kv_trans_timeout": 10,
                    "kv_link_timeout": 1080
                }
            ]
        },

        "ScheduleConfig" :
        {
            "templateType" : "Standard",
            "templateName" : "Standard_LLM",
            "cacheBlockSize" : 128,

            "maxPrefillBatchSize" : 50,
            "maxPrefillTokens" : 60000,
            "prefillTimeMsPerReq" : 150,
            "prefillPolicyType" : 0,

            "decodeTimeMsPerReq" : 50,
            "decodePolicyType" : 0,

            "maxBatchSize" : 50,
            "maxIterTimes" : 40000,
            "maxPreemptCount" : 0,
            "supportSelectBatch" : false,
            "maxQueueDelayMicroseconds" : 5000,
            "maxFirstTokenWaitTime": 2500
        }
    },

    "LogConfig": {
        "dynamicLogLevel" : "",
        "dynamicLogLevelValidHours" : 2,
        "dynamicLogLevelValidTime" : ""
    },

    "EnableDynamicAdjustTimeoutConfig": false
}

加载镜像、启动容器、验证等相关操作,请参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

相关推荐
楚枫默寒6 小时前
Linux 编辑文件后自动添加修改日期
linux·运维·bash
马***4117 小时前
适配成人英语学习痛点,打造落地性强的学习辅助方式
人工智能·学习
夜焱辰7 小时前
浏览器端 Agent 的文件版本管理:不用 Git,基于 OPFS + SQLite 自己造了一个
前端·人工智能
Ricky05537 小时前
CTRL-WORLD:一种用于机器人操控的可控生成世界模型(中美2025年联合研究)
人工智能·机器人·世界模型
jeffer_liu8 小时前
Spring AI 生产级实战:工具调用
java·人工智能·后端·spring·ai编程
阿乔外贸日记8 小时前
2026尼日利亚五项清关政策更新,拉高能源装备进口综合成本
大数据·人工智能·搜索引擎·智能手机·云计算·能源
民乐团扒谱机8 小时前
【AI笔记】短时纯音时长对音高感知偏移效应研究综述
人工智能·笔记
侃谈科技圈8 小时前
破除数据中台落地困境:2026数据治理平台差异化能力与选型决策指南
大数据·人工智能
大象说8 小时前
Python多进程共享队列无报错僵死 120G Nginx访问日志清洗踩坑全记录
人工智能·自然语言处理
Cosolar8 小时前
AutoGen 精通教程:从零到企业级多 Agent 系统架构师
人工智能·后端·面试