昇腾910B部署deepseek蒸馏版

昇腾910B部署deepseek蒸馏版

  1. mindie镜像下载

    登陆后申请权限去下载

    https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f

    在这里插入图片描述

  2. 启动mindie容器

    docker run -itd -u root \
        --ipc=host \
        --network=host \
    	--device=/dev/davinci0 \
    	--device=/dev/davinci1 \
    	--device=/dev/davinci2 \
    	--device=/dev/davinci3 \
    	--device=/dev/davinci4 \
    	--device=/dev/davinci5 \
    	--device=/dev/davinci6 \
    	--device=/dev/davinci7 \
        --device=/dev/davinci_manager \
        --device=/dev/devmm_svm \
        --device=/dev/hisi_hdc \
        -v /var/log/npu/:/usr/slog \
        -v /usr/local/dcmi:/usr/local/dcmi \
        -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
    	-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
        -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
        -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
    	-v /usr/bin/hccn_tool:/usr/bin/hccn_tool \
    	-v /data/DeepSeek-R1-Distill-Llama-70B:/models \ # 模型目录挂载
        --name Deepseek-R1-70B \
    	--privileged=true \
        --entrypoint=/bin/bash swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.0.T3-800I-A2-py311-openeuler24.03-lts
    

在这里插入图片描述

  1. 启动服务化

    1. 进入容器

      docker exec -it c3219fbca343 bash
      
    2. 切换到mindie-service配置路径

      cd /usr/local/Ascend/mindie/latest/mindie-service
      
    3. 修改配置文件:vim conf/config.json

      以下配置文件中用#注释的内容均可自定义

      {
          "Version" : "1.0.0",
          "LogConfig" :
          {
              "logLevel" : "Info",
              "logFileSize" : 20,
              "logFileNum" : 20,
              "logPath" : "logs/mindie-server.log"
          },
      
          "ServerConfig" :
          {
              "ipAddress" : "10.0.0.10", # 允许外部访问,暴露本机ip
              "managementIpAddress" : "10.0.0.10",
              "port" : 1080, # 推理端口
              "managementPort" : 1081, # 管理端口
              "metricsPort" : 1082, # metrics指标断藕
              "allowAllZeroIpListening" : false,
              "maxLinkNum" : 1000,
              "httpsEnabled" : false, # 禁用https
              "fullTextEnabled" : false,
              "tlsCaPath" : "security/ca/",
              "tlsCaFile" : ["ca.pem"],
              "tlsCert" : "security/certs/server.pem",
              "tlsPk" : "security/keys/server.key.pem",
              "tlsPkPwd" : "security/pass/key_pwd.txt",
              "tlsCrlPath" : "security/certs/",
              "tlsCrlFiles" : ["server_crl.pem"],
              "managementTlsCaFile" : ["management_ca.pem"],
              "managementTlsCert" : "security/certs/management/server.pem",
              "managementTlsPk" : "security/keys/management/server.key.pem",
              "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
              "managementTlsCrlPath" : "security/management/certs/",
              "managementTlsCrlFiles" : ["server_crl.pem"],
              "kmcKsfMaster" : "tools/pmt/master/ksfa",
              "kmcKsfStandby" : "tools/pmt/standby/ksfb",
              "inferMode" : "standard",
              "interCommTLSEnabled" : true,
              "interCommPort" : 1121,
              "interCommTlsCaPath" : "security/grpc/ca/",
              "interCommTlsCaFiles" : ["ca.pem"],
              "interCommTlsCert" : "security/grpc/certs/server.pem",
              "interCommPk" : "security/grpc/keys/server.key.pem",
              "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
              "interCommTlsCrlPath" : "security/grpc/certs/",
              "interCommTlsCrlFiles" : ["server_crl.pem"],
              "openAiSupport" : "vllm"
          },
      
          "BackendConfig" : {
              "backendName" : "mindieservice_llm_engine",
              "modelInstanceNumber" : 1,
              "npuDeviceIds" : [[0,1,2,3]], # 指定使用哪些npu卡
              "tokenizerProcessNumber" : 8,
              "multiNodesInferEnabled" : false,
              "multiNodesInferPort" : 1120,
              "interNodeTLSEnabled" : true,
              "interNodeTlsCaPath" : "security/grpc/ca/",
              "interNodeTlsCaFiles" : ["ca.pem"],
              "interNodeTlsCert" : "security/grpc/certs/server.pem",
              "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
              "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
              "interNodeTlsCrlPath" : "security/grpc/certs/",
              "interNodeTlsCrlFiles" : ["server_crl.pem"],
              "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
              "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
              "ModelDeployConfig" :
              {
                  "maxSeqLen" : 2560, # 输入输出总序列长度
                  "maxInputTokenLen" : 2048, # 输入token数
                  "truncation" : false,
                  "ModelConfig" : [
                      {
                          "modelInstanceType" : "Standard",
                          "modelName" : "DeepSeek-R1-Distill-Llama-70B", # 模型名
                          "modelWeightPath" : "/models", # 容器内模型的挂载路径
                          "worldSize" : 4, # 使用npu卡数
                          "cpuMemSize" : 5,
                          "npuMemSize" : -1,
                          "backendType" : "atb",
                          "trustRemoteCode" : false
                      }
                  ]
              },
      
              "ScheduleConfig" :
              {
                  "templateType" : "Standard",
                  "templateName" : "Standard_LLM",
                  "cacheBlockSize" : 128,
      
                  "maxPrefillBatchSize" : 50,
                  "maxPrefillTokens" : 8192,
                  "prefillTimeMsPerReq" : 150,
                  "prefillPolicyType" : 0,
      
                  "decodeTimeMsPerReq" : 50,
                  "decodePolicyType" : 0,
      
                  "maxBatchSize" : 200,
                  "maxIterTimes" : 512,
                  "maxPreemptCount" : 0,
                  "supportSelectBatch" : false,
                  "maxQueueDelayMicroseconds" : 5000
              }
          }
      }
      
    4. 如果要使用服务化的在线指标监控,需要配置环境变量

      export MIES_SERVICE_MONITOR_MODE=1
      
  2. 后台启动服务化

    nohup  ./bin/mindieservice_daemon > 70B.log 2>&1 &
    tail -f 70B.log出现success即成功
    
  3. 请求测试

    curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem  --key client.key.pem -X POST -d '{
     "model": "DeepSeek-R1-Distill-Llama-70B",# 此处的模型名与配置文件中的模型名保持一致
     "messages": [{
      "role": "system",
      "content": "帮我定做一份去南京中山陵游玩的攻略."
     }],
     "max_tokens": 2048,
     "presence_penalty": 1.03,
     "frequency_penalty": 1.0,
     "seed": null,
     "temperature": 0.7,
     "top_p": 0.95,
     "stream": false
    }' http://10.0.0.10:1080/v1/chat/completions
    

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值