fastchat 推理部署
nohup python3 -m fastchat.serve.controller >/dev/null 2>&1 &
CUDA_VISIBLE_DEVICES=2,3 nohup python3 -m fastchat.serve.model_worker --model-path /home/ai4090/model/Qwen1.5-14B-Chat --controller http://localhost:21001 --port 31001 --model-names Qwen-14B-Chat