昇腾910B部署deepseek蒸馏版
-
mindie镜像下载
登陆后申请权限去下载
https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f
-
启动mindie容器
docker run -itd -u root \ --ipc=host \ --network=host \ --device=/dev/davinci0 \ --device=/dev/davinci1 \ --device=/dev/davinci2 \ --device=/dev/davinci3 \ --device=/dev/davinci4 \ --device=/dev/davinci5 \ --device=/dev/davinci6 \ --device=/dev/davinci7 \ --device=/dev/davinci_manager \ --device=/dev/devmm_svm \ --device=/dev/hisi_hdc \ -v /var/log/npu/:/usr/slog \ -v /usr/local/dcmi:/usr/local/dcmi \ -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \ -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \ -v /usr/bin/hccn_tool:/usr/bin/hccn_tool \ -v /data/DeepSeek-R1-Distill-Llama-70B:/models \ # 模型目录挂载 --name Deepseek-R1-70B \ --privileged=true \ --entrypoint=/bin/bash swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.0.T3-800I-A2-py311-openeuler24.03-lts
-
启动服务化
-
进入容器
docker exec -it c3219fbca343 bash
-
切换到mindie-service配置路径
cd /usr/local/Ascend/mindie/latest/mindie-service
-
修改配置文件:
vim conf/config.json
以下配置文件中用#注释的内容均可自定义
{ "Version" : "1.0.0", "LogConfig" : { "logLevel" : "Info", "logFileSize" : 20, "logFileNum" : 20, "logPath" : "logs/mindie-server.log" }, "ServerConfig" : { "ipAddress" : "10.0.0.10", # 允许外部访问,暴露本机ip "managementIpAddress" : "10.0.0.10", "port" : 1080, # 推理端口 "managementPort" : 1081, # 管理端口 "metricsPort" : 1082, # metrics指标断藕 "allowAllZeroIpListening" : false, "maxLinkNum" : 1000, "httpsEnabled" : false, # 禁用https "fullTextEnabled" : false, "tlsCaPath" : "security/ca/", "tlsCaFile" : ["ca.pem"], "tlsCert" : "security/certs/server.pem", "tlsPk" : "security/keys/server.key.pem", "tlsPkPwd" : "security/pass/key_pwd.txt", "tlsCrlPath" : "security/certs/", "tlsCrlFiles" : ["server_crl.pem"], "managementTlsCaFile" : ["management_ca.pem"], "managementTlsCert" : "security/certs/management/server.pem", "managementTlsPk" : "security/keys/management/server.key.pem", "managementTlsPkPwd" : "security/pass/management/key_pwd.txt", "managementTlsCrlPath" : "security/management/certs/", "managementTlsCrlFiles" : ["server_crl.pem"], "kmcKsfMaster" : "tools/pmt/master/ksfa", "kmcKsfStandby" : "tools/pmt/standby/ksfb", "inferMode" : "standard", "interCommTLSEnabled" : true, "interCommPort" : 1121, "interCommTlsCaPath" : "security/grpc/ca/", "interCommTlsCaFiles" : ["ca.pem"], "interCommTlsCert" : "security/grpc/certs/server.pem", "interCommPk" : "security/grpc/keys/server.key.pem", "interCommPkPwd" : "security/grpc/pass/key_pwd.txt", "interCommTlsCrlPath" : "security/grpc/certs/", "interCommTlsCrlFiles" : ["server_crl.pem"], "openAiSupport" : "vllm" }, "BackendConfig" : { "backendName" : "mindieservice_llm_engine", "modelInstanceNumber" : 1, "npuDeviceIds" : [[0,1,2,3]], # 指定使用哪些npu卡 "tokenizerProcessNumber" : 8, "multiNodesInferEnabled" : false, "multiNodesInferPort" : 1120, "interNodeTLSEnabled" : true, "interNodeTlsCaPath" : "security/grpc/ca/", "interNodeTlsCaFiles" : ["ca.pem"], "interNodeTlsCert" : "security/grpc/certs/server.pem", "interNodeTlsPk" : "security/grpc/keys/server.key.pem", "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt", "interNodeTlsCrlPath" : "security/grpc/certs/", "interNodeTlsCrlFiles" : ["server_crl.pem"], "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa", "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb", "ModelDeployConfig" : { "maxSeqLen" : 2560, # 输入输出总序列长度 "maxInputTokenLen" : 2048, # 输入token数 "truncation" : false, "ModelConfig" : [ { "modelInstanceType" : "Standard", "modelName" : "DeepSeek-R1-Distill-Llama-70B", # 模型名 "modelWeightPath" : "/models", # 容器内模型的挂载路径 "worldSize" : 4, # 使用npu卡数 "cpuMemSize" : 5, "npuMemSize" : -1, "backendType" : "atb", "trustRemoteCode" : false } ] }, "ScheduleConfig" : { "templateType" : "Standard", "templateName" : "Standard_LLM", "cacheBlockSize" : 128, "maxPrefillBatchSize" : 50, "maxPrefillTokens" : 8192, "prefillTimeMsPerReq" : 150, "prefillPolicyType" : 0, "decodeTimeMsPerReq" : 50, "decodePolicyType" : 0, "maxBatchSize" : 200, "maxIterTimes" : 512, "maxPreemptCount" : 0, "supportSelectBatch" : false, "maxQueueDelayMicroseconds" : 5000 } } }
-
如果要使用服务化的在线指标监控,需要配置环境变量
export MIES_SERVICE_MONITOR_MODE=1
-
-
后台启动服务化
nohup ./bin/mindieservice_daemon > 70B.log 2>&1 & tail -f 70B.log出现success即成功
-
请求测试
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "model": "DeepSeek-R1-Distill-Llama-70B",# 此处的模型名与配置文件中的模型名保持一致 "messages": [{ "role": "system", "content": "帮我定做一份去南京中山陵游玩的攻略." }], "max_tokens": 2048, "presence_penalty": 1.03, "frequency_penalty": 1.0, "seed": null, "temperature": 0.7, "top_p": 0.95, "stream": false }' http://10.0.0.10:1080/v1/chat/completions