【大语言模型_2】mindie部署deepseek模型

一、模型下载

https://modelscope.cn/home

复制代码
下载地址:https://modelscope.cn/home

二、服务器驱动/固件安装

复制代码
驱动/固件下载地址:
https://www.hiascend.com/hardware/firmware-drivers/community?product=1&model=23&cann=All&driver=1.0.21.alpha

驱动/固件安装地址:
https://www.hiascend.com/document/detail/zh/Atlas%20200I%20A2/24.1.0/ep/installationguide/Install_10.html

安装完成后执行npu-smi info验证

三、mindie推理框架下载

复制代码
下载地址:
https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f

四、本地部署

1、修改模型配置文件
复制代码
修改模型config.json权限
进入到模型根目录
chmod 640 config.json

不修改启动模型会报错

2、修改模型config.json文件

修改模型config.json

"torch_dtype": "bfloat16" ------更改为 "torch_dtype": "float16"

3、启动镜像
复制代码
docker run --name zml_mindie -it -d --net=host --shm-size=500g \
    --privileged \
    -w /home \
    --entrypoint=bash \
    --device=/dev/davinci_manager \
    --device=/dev/hisi_hdc \
    --device=/dev/devmm_svm \
    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
    -v /usr/local/dcmi:/usr/local/dcmi \
    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
    -v /usr/local/sbin:/usr/local/sbin \
    -v /root/xxx/mindformer_share/:/home/xxx_share \
    -v /etc/hccn.conf:/etc/hccn.conf \
    -v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime \
swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:1.0.RC3-300I-Duo-arm64  

进入容器,修改配置文件

复制代码
修改mindieieserver配置文件
vi /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json



{
 29.     "Version" : "1.0.0",
 30.     "LogConfig" :
 31.     {
 32.         "logLevel" : "Info",
 33.         "logFileSize" : 20,
 34.         "logFileNum" : 20,
 35.         "logPath" : "logs/mindservice.log"
 36.     },
 37.  
 38.     "ServerConfig" :
 39.     {
 40.         "ipAddress" : "192.168.202.13",  中文解释:业务面接口绑定IP地址。
 41.         "managementIpAddress" : "127.0.0.2",
 42.         "port" : 25010,  中文解释:端口号
 43.         "managementPort" : 1026,
 44.         "metricsPort" : 1027,
 45.         "allowAllZeroIpListening" : false,
 46.         "maxLinkNum" : 1000,
 47.         "httpsEnabled" : false, 中文解释:是否开启HTTPS安全通信
 48.         "fullTextEnabled" : false,
 49.         "tlsCaPath" : "security/ca/",
 50.         "tlsCaFile" : ["ca.pem"],
 51.         "tlsCert" : "security/certs/server.pem",
 52.         "tlsPk" : "security/keys/server.key.pem",
 53.         "tlsPkPwd" : "security/pass/key_pwd.txt",
 54.         "tlsCrl" : "security/certs/server_crl.pem",
 55.         "managementTlsCaFile" : ["management_ca.pem"],
 56.         "managementTlsCert" : "security/certs/management/server.pem",
 57.         "managementTlsPk" : "security/keys/management/server.key.pem",
 58.         "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
 59.         "managementTlsCrl" : "security/certs/management/server_crl.pem",
 60.         "kmcKsfMaster" : "tools/pmt/master/ksfa",
 61.         "kmcKsfStandby" : "tools/pmt/standby/ksfb",
 62.         "inferMode" : "standard",
 63.         "interCommTLSEnabled" : false,
 64.         "interCommPort" : 1121,
 65.         "interCommTlsCaFile" : "security/grpc/ca/ca.pem",
 66.         "interCommTlsCert" : "security/grpc/certs/server.pem",
 67.         "interCommPk" : "security/grpc/keys/server.key.pem",
 68.         "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
 69.         "interCommTlsCrl" : "security/certs/server_crl.pem",
 70.         "openAiSupport" : "vllm"
 71.     },
 72.  
 73.     "BackendConfig" : {
 74.         "backendName" : "mindieservice_llm_engine",
 75.         "modelInstanceNumber" : 1,
 76.         "npuDeviceIds" : [[0,1,2,3]],  中文解释:启动哪几张卡
 77.         "tokenizerProcessNumber" : 8,
 78.         "multiNodesInferEnabled" : false,
 79.         "multiNodesInferPort" : 1120,
 80.         "interNodeTLSEnabled" : true,
 81.         "interNodeTlsCaFile" : "security/grpc/ca/ca.pem",
 82.         "interNodeTlsCert" : "security/grpc/certs/server.pem",
 83.         "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
 84.         "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
 85.         "interNodeTlsCrl" : "security/grpc/certs/server_crl.pem",
 86.         "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
 87.         "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
 88.         "ModelDeployConfig" :
 89.         {
 90.             "maxSeqLen" : 10000,     中文解释:模型支持最大序列长度
 91.             "maxInputTokenLen" : 10000,  中文解释:输入token id最大长度
 92.             "truncation" : false,
 93.             "ModelConfig" : [
 94.                 {
 95.                     "modelInstanceType" : "Standard",
 96.                     "modelName" : "TopASK",  中文解释:启动模型名称
 97.                     "modelWeightPath" : "/home/xxx_share/DeepSeek-R1-Distill-Qwen-14B",    中文解释:模型路径
 98.                     "worldSize" : 4,    中文解释:启动几张推理卡
 99.                     "cpuMemSize" : 5,
100.                     "npuMemSize" : -1,
101.                     "backendType" : "atb"
102.                 }
103.             ]
104.         },
105.  
106.         "ScheduleConfig" :
107.         {
108.             "templateType" : "Standard",
109.             "templateName" : "Standard_LLM",
110.             "cacheBlockSize" : 128,
111.  
112.             "maxPrefillBatchSize" : 50,
113.             "maxPrefillTokens" : 18192,  中文解释:模型推理过程中,prefil阶段能够处理最大token数量。
114.             "prefillTimeMsPerReq" : 150,
115.             "prefillPolicyType" : 0,
116.  
117.             "decodeTimeMsPerReq" : 50,
118.             "decodePolicyType" : 0,
119.  
120.             "maxBatchSize" : 200,
121.             "maxIterTimes" : 512,
122.             "maxPreemptCount" : 0,
123.             "supportSelectBatch" : false,
124.             "maxQueueDelayMicroseconds" : 5000
125.         }
126.     }
127. }


参数详情文档:https://www.hiascend.com/document/detail/zh/mindie/100/mindieservice/servicedev/mindie_service0285.html
4、启动服务
复制代码
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon
相关推荐
阿坡RPA11 小时前
手搓MCP客户端&服务端:从零到实战极速了解MCP是什么?
人工智能·aigc
用户277844910499311 小时前
借助DeepSeek智能生成测试用例:从提示词到Excel表格的全流程实践
人工智能·python
机器之心11 小时前
刚刚,DeepSeek公布推理时Scaling新论文,R2要来了?
人工智能
算AI13 小时前
人工智能+牙科:临床应用中的几个问题
人工智能·算法
凯子坚持 c14 小时前
基于飞桨框架3.0本地DeepSeek-R1蒸馏版部署实战
人工智能·paddlepaddle
你觉得20514 小时前
哈尔滨工业大学DeepSeek公开课:探索大模型原理、技术与应用从GPT到DeepSeek|附视频与讲义下载方法
大数据·人工智能·python·gpt·学习·机器学习·aigc
8K超高清15 小时前
中国8K摄像机:科技赋能文化传承新图景
大数据·人工智能·科技·物联网·智能硬件
hyshhhh15 小时前
【算法岗面试题】深度学习中如何防止过拟合?
网络·人工智能·深度学习·神经网络·算法·计算机视觉
薛定谔的猫-菜鸟程序员15 小时前
零基础玩转深度神经网络大模型:从Hello World到AI炼金术-详解版(含:Conda 全面使用指南)
人工智能·神经网络·dnn
币之互联万物15 小时前
2025 AI智能数字农业研讨会在苏州启幕,科技助农与数据兴业成焦点
人工智能·科技