一、模型下载
下载地址:https://modelscope.cn/home
二、服务器驱动/固件安装
驱动/固件下载地址:
https://www.hiascend.com/hardware/firmware-drivers/community?product=1&model=23&cann=All&driver=1.0.21.alpha
驱动/固件安装地址:
https://www.hiascend.com/document/detail/zh/Atlas%20200I%20A2/24.1.0/ep/installationguide/Install_10.html
安装完成后执行npu-smi info验证

三、mindie推理框架下载
下载地址:
https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f
四、本地部署
1、修改模型配置文件
修改模型config.json权限
进入到模型根目录
chmod 640 config.json
不修改启动模型会报错

2、修改模型config.json文件
修改模型config.json
"torch_dtype": "bfloat16" ------更改为 "torch_dtype": "float16"

3、启动镜像
docker run --name zml_mindie -it -d --net=host --shm-size=500g \
--privileged \
-w /home \
--entrypoint=bash \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device=/dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /root/xxx/mindformer_share/:/home/xxx_share \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:1.0.RC3-300I-Duo-arm64
进入容器,修改配置文件
修改mindieieserver配置文件
vi /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
{
29. "Version" : "1.0.0",
30. "LogConfig" :
31. {
32. "logLevel" : "Info",
33. "logFileSize" : 20,
34. "logFileNum" : 20,
35. "logPath" : "logs/mindservice.log"
36. },
37.
38. "ServerConfig" :
39. {
40. "ipAddress" : "192.168.202.13", 中文解释:业务面接口绑定IP地址。
41. "managementIpAddress" : "127.0.0.2",
42. "port" : 25010, 中文解释:端口号
43. "managementPort" : 1026,
44. "metricsPort" : 1027,
45. "allowAllZeroIpListening" : false,
46. "maxLinkNum" : 1000,
47. "httpsEnabled" : false, 中文解释:是否开启HTTPS安全通信
48. "fullTextEnabled" : false,
49. "tlsCaPath" : "security/ca/",
50. "tlsCaFile" : ["ca.pem"],
51. "tlsCert" : "security/certs/server.pem",
52. "tlsPk" : "security/keys/server.key.pem",
53. "tlsPkPwd" : "security/pass/key_pwd.txt",
54. "tlsCrl" : "security/certs/server_crl.pem",
55. "managementTlsCaFile" : ["management_ca.pem"],
56. "managementTlsCert" : "security/certs/management/server.pem",
57. "managementTlsPk" : "security/keys/management/server.key.pem",
58. "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
59. "managementTlsCrl" : "security/certs/management/server_crl.pem",
60. "kmcKsfMaster" : "tools/pmt/master/ksfa",
61. "kmcKsfStandby" : "tools/pmt/standby/ksfb",
62. "inferMode" : "standard",
63. "interCommTLSEnabled" : false,
64. "interCommPort" : 1121,
65. "interCommTlsCaFile" : "security/grpc/ca/ca.pem",
66. "interCommTlsCert" : "security/grpc/certs/server.pem",
67. "interCommPk" : "security/grpc/keys/server.key.pem",
68. "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
69. "interCommTlsCrl" : "security/certs/server_crl.pem",
70. "openAiSupport" : "vllm"
71. },
72.
73. "BackendConfig" : {
74. "backendName" : "mindieservice_llm_engine",
75. "modelInstanceNumber" : 1,
76. "npuDeviceIds" : [[0,1,2,3]], 中文解释:启动哪几张卡
77. "tokenizerProcessNumber" : 8,
78. "multiNodesInferEnabled" : false,
79. "multiNodesInferPort" : 1120,
80. "interNodeTLSEnabled" : true,
81. "interNodeTlsCaFile" : "security/grpc/ca/ca.pem",
82. "interNodeTlsCert" : "security/grpc/certs/server.pem",
83. "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
84. "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
85. "interNodeTlsCrl" : "security/grpc/certs/server_crl.pem",
86. "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
87. "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
88. "ModelDeployConfig" :
89. {
90. "maxSeqLen" : 10000, 中文解释:模型支持最大序列长度
91. "maxInputTokenLen" : 10000, 中文解释:输入token id最大长度
92. "truncation" : false,
93. "ModelConfig" : [
94. {
95. "modelInstanceType" : "Standard",
96. "modelName" : "TopASK", 中文解释:启动模型名称
97. "modelWeightPath" : "/home/xxx_share/DeepSeek-R1-Distill-Qwen-14B", 中文解释:模型路径
98. "worldSize" : 4, 中文解释:启动几张推理卡
99. "cpuMemSize" : 5,
100. "npuMemSize" : -1,
101. "backendType" : "atb"
102. }
103. ]
104. },
105.
106. "ScheduleConfig" :
107. {
108. "templateType" : "Standard",
109. "templateName" : "Standard_LLM",
110. "cacheBlockSize" : 128,
111.
112. "maxPrefillBatchSize" : 50,
113. "maxPrefillTokens" : 18192, 中文解释:模型推理过程中,prefil阶段能够处理最大token数量。
114. "prefillTimeMsPerReq" : 150,
115. "prefillPolicyType" : 0,
116.
117. "decodeTimeMsPerReq" : 50,
118. "decodePolicyType" : 0,
119.
120. "maxBatchSize" : 200,
121. "maxIterTimes" : 512,
122. "maxPreemptCount" : 0,
123. "supportSelectBatch" : false,
124. "maxQueueDelayMicroseconds" : 5000
125. }
126. }
127. }
参数详情文档:https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0285.html
4、启动服务
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon