一、部署Qwen-7B
1.创建conda虚拟环境
conda create -n qwen-7b python=3.10
conda activate qwen-7b
2.拉取代码
git clone https://github.com/QwenLM/Qwen-7B.git
3.进入Qwen-7B文件,下载模型
git clone https://www.modelscope.cn/qwen/Qwen-7B-Chat.git
4.安装环境依赖
cd Qwen-7B
pip install -r requirements.txt
pip install -r requirements_web_demo.txt
# 如果使用量化模型Qwen-7B-Chat-Int4,需要安装
pip install auto-gptq optimum
5.修改配置文件
# 修改webdemo.py和cli_demo.py文件
DEFAULT_CKPT_PATH = '/mnt/workspace/Qwen-7B/Qwen-7B-Chat'
# device_map=device_map修改为cuda
model = AutoModelForCausalLM.from_pretrained(
args.checkpoint_path,
# device_map=device_map,
device_map="cuda",
trust_remote_code=True,
resume_download=True,
).eval()
6.运行
# 网页端
python web_demo.py
# 命令行端
python cli_demo.py
7. 如果您的设备支持fp16或bf16,我们建议安装flash-attention(我们现在支持flash Attention 2。)以获得更高的效率和更低的内存占用。 (flash-attention是可选的,项目无需安装即可正常运行)
git clone https://github.com/Dao-AILab/flash-attention
cd flash-attention && pip install .
# Below are optional. Installing them might be slow.
# pip install csrc/layer_norm
# If the version of flash-attn is higher than 2.1.1, the following is not needed.
# pip install csrc/rotary
二、部署Qwen-VL
1.创建conda虚拟环境
conda create -n qwen-vl python=3.10
conda activate qwen-vl
2.拉取代码
git clone https://github.com/QwenLM/Qwen-VL
3.进入Qwen-VL文件,下载模型
git clone https://www.modelscope.cn/qwen/Qwen-VL-Chat-Int4.git
4.安装环境依赖
cd Qwen-VL
pip install -r requirements.txt
# 修改requirements_web_demo.txt中,gradio==3.39
pip install -r requirements_web_demo.txt
5.安装torch torchvision torchaudio
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
6.安装A卡版本auto-gptq
pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/rocm542/
pip install -q optimum
7.修改web_demo_mm.py
文件
DEFAULT_CKPT_PATH = '/mnt/workspace/Qwen-VL/Qwen-VL-Chat'
# device_map=device_map修改为cuda
model = AutoModelForCausalLM.from_pretrained(
args.checkpoint_path,
# device_map=device_map,
device_map="cuda",
trust_remote_code=True,
resume_download=True,
).eval()
8.运行
python web_demo_mm.py