目录
- [一. 检查服务器、OS等配置](#一. 检查服务器、OS等配置)
- [二. 驱动和固件下载与安装](#二. 驱动和固件下载与安装)
- [三. 下载mindie镜像并打包](#三. 下载mindie镜像并打包)
- [四. 下载权重文件](#四. 下载权重文件)
- [五. 启动模型](#五. 启动模型)
-
- [1. model_start.sh文件](#1. model_start.sh文件)
- [2. conf.json文件](#2. conf.json文件)
一. 检查服务器、OS等配置
博主这里设备清单:
- 服务器型号:Atlas 800T A2
- 显卡类型:910B3,8张,单张显存64G
- 操作系统:openEuler-24.03(LTS-SP1)
- cpu指令架构:AArch64
二. 驱动和固件下载与安装
需要了解的点:
- 910b3属于A2系列显卡;910c属于A3系列显卡。
- 固件与驱动我们直接使用最新版本(Ascend HDK 25.5.2)
- 910b3,一般使用8.5.1的CANN。
其他选项直接参考下图来选。

具体驱动安装参考:https://blog.csdn.net/mizhiakk/article/details/147305068
三. 下载mindie镜像并打包
使用 Docker Desktop下载 2.3.1-800I-A2-py311-openeuler24.03-lts 版本,具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501

四. 下载权重文件
权重文件下载地址:
具体操作参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501
五. 启动模型
1. model_start.sh文件
这里博主使用4张显卡来跑模型。
bash
#!/usr/bin/sh
docker run -it -d --shm-size 200g --net=host --name Qwen3-32b-new --privileged --device=/dev/davinci_manager --device=/dev/hisi_hdc --device=/dev/devmm_svm --device=/dev/davinci4 --device=/dev/davinci5 --device=/dev/davinci6 --device=/dev/davinci7 -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /usr/local/sbin:/usr/local/sbin -v /mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B:/mnt/disk1/modules/qwen3-32b/Qwen3-32B/Qwen3-32B ba625eff6e40 /bin/bash
2. conf.json文件
bash
{
"Version" : "1.0.0",
"ServerConfig" :
{
"ipAddress" : "192.168.132.222",
"managementIpAddress" : "127.0.0.2",
"port" : 1025,
"managementPort" : 1026,
"metricsPort" : 1027,
"allowAllZeroIpListening" : false,
"maxLinkNum" : 1000,
"httpsEnabled" : false,
"fullTextEnabled" : false,
"tlsCaPath" : "security/ca/",
"tlsCaFile" : ["ca.pem"],
"tlsCert" : "security/certs/server.pem",
"tlsPk" : "security/keys/server.key.pem",
"tlsPkPwd" : "security/pass/key_pwd.txt",
"tlsCrlPath" : "security/certs/",
"tlsCrlFiles" : ["server_crl.pem"],
"managementTlsCaFile" : ["management_ca.pem"],
"managementTlsCert" : "security/certs/management/server.pem",
"managementTlsPk" : "security/keys/management/server.key.pem",
"managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
"managementTlsCrlPath" : "security/management/certs/",
"managementTlsCrlFiles" : ["server_crl.pem"],
"metricsTlsCaFile" : ["metrics_ca.pem"],
"metricsTlsCert" : "security/certs/metrics/server.pem",
"metricsTlsPk" : "security/keys/metrics/server.key.pem",
"metricsTlsPkPwd" : "security/pass/metrics/key_pwd.txt",
"metricsTlsCrlPath" : "security/metrics/certs/",
"metricsTlsCrlFiles" : ["server_crl.pem"],
"kmcKsfMaster" : "tools/pmt/master/ksfa",
"kmcKsfStandby" : "tools/pmt/standby/ksfb",
"inferMode" : "standard",
"interCommTLSEnabled" : true,
"interCommPort" : 1121,
"interCommTlsCaPath" : "security/grpc/ca/",
"interCommTlsCaFiles" : ["ca.pem"],
"interCommTlsCert" : "security/grpc/certs/server.pem",
"interCommPk" : "security/grpc/keys/server.key.pem",
"interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
"interCommTlsCrlPath" : "security/grpc/certs/",
"interCommTlsCrlFiles" : ["server_crl.pem"],
"openAiSupport" : "vllm",
"tokenTimeout" : 600,
"e2eTimeout" : 600,
"distDPServerEnabled":false
},
"BackendConfig" : {
"backendName" : "mindieservice_llm_engine",
"modelInstanceNumber" : 1,
"npuDeviceIds" : [[4,5,6,7]],
"tokenizerProcessNumber" : 8,
"multiNodesInferEnabled" : false,
"multiNodesInferPort" : 1120,
"interNodeTLSEnabled" : true,
"interNodeTlsCaPath" : "security/grpc/ca/",
"interNodeTlsCaFiles" : ["ca.pem"],
"interNodeTlsCert" : "security/grpc/certs/server.pem",
"interNodeTlsPk" : "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
"interNodeTlsCrlPath" : "security/grpc/certs/",
"interNodeTlsCrlFiles" : ["server_crl.pem"],
"interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
"kvPoolConfig" : {"backend":"", "configPath":""},
"ModelDeployConfig" :
{
"maxSeqLen" : 100000,
"maxInputTokenLen" : 60000,
"truncation" : false,
"ModelConfig" : [
{
"modelInstanceType" : "Standard",
"modelName" : "qwen3-32b",
"modelWeightPath" : "/mnt/disk2/modules/qwen3-32b/Qwen3-32B/Qwen3-32B/",
"worldSize" : 4,
"cpuMemSize" : 0,
"npuMemSize" : -1,
"backendType" : "atb",
"trustRemoteCode" : false,
"async_scheduler_wait_time": 120,
"kv_trans_timeout": 10,
"kv_link_timeout": 1080
}
]
},
"ScheduleConfig" :
{
"templateType" : "Standard",
"templateName" : "Standard_LLM",
"cacheBlockSize" : 128,
"maxPrefillBatchSize" : 50,
"maxPrefillTokens" : 60000,
"prefillTimeMsPerReq" : 150,
"prefillPolicyType" : 0,
"decodeTimeMsPerReq" : 50,
"decodePolicyType" : 0,
"maxBatchSize" : 50,
"maxIterTimes" : 40000,
"maxPreemptCount" : 0,
"supportSelectBatch" : false,
"maxQueueDelayMicroseconds" : 5000,
"maxFirstTokenWaitTime": 2500
}
},
"LogConfig": {
"dynamicLogLevel" : "",
"dynamicLogLevelValidHours" : 2,
"dynamicLogLevelValidTime" : ""
},
"EnableDynamicAdjustTimeoutConfig": false
}
加载镜像、启动容器、验证等相关操作,请参考:https://blog.csdn.net/qq_39671159/article/details/157475648?spm=1001.2014.3001.5501