(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker exec -it Qwen2.5-VL-72B-Instruct-AWQ bash
echo "nameserver 8.8.8.8" | tee /etc/resolv.conf >/dev/null
echo "nameserver 114.114.114.114" | tee -a /etc/resolv.conf >/dev/null
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
pip config set install.trusted-host mirrors.aliyun.com
pip install vllm[video]
pip install vllm[audio]
root@64ab8e1634b9:/vllm-workspace# pip install qwen-vl-utils
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f160fb11d60>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /pypi/simple/qwen-vl-utils/
WARNING: Retrying (Retry(total=3, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f160f893470>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /pypi/simple/qwen-vl-utils/
WARNING: Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.HTTPSConnection object at 0x7f160f893740>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')': /pypi/simple/qwen-vl-utils/
^CERROR: Operation cancelled by user
root@64ab8e1634b9:/vllm-workspace# echo "nameserver 8.8.8.8" | tee /etc/resolv.conf >/dev/null
root@64ab8e1634b9:/vllm-workspace# echo "nameserver 114.114.114.114" | tee -a /etc/resolv.conf >/dev/null
root@64ab8e1634b9:/vllm-workspace# pip install qwen-vl-utils
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting qwen-vl-utils
Downloading https://mirrors.aliyun.com/pypi/packages/9e/f2/b489f9077bd410f44400d056ad7b0246ee3c516bf9d65b362ea35a57ce1c/qwen_vl_utils-0.0.10-py3-none-any.whl (6.7 kB)
Collecting av (from qwen-vl-utils)
Downloading https://mirrors.aliyun.com/pypi/packages/40/ff/2237657852dac32052b7401da6bc7fc23127dc7a1ccbb23d4c640c8ea95b/av-14.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.4 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.4/35.4 MB 471.6 kB/s eta 0:00:00
Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from qwen-vl-utils) (24.2)
Requirement already satisfied: pillow in /usr/local/lib/python3.12/dist-packages (from qwen-vl-utils) (11.1.0)
Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from qwen-vl-utils) (2.32.3)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->qwen-vl-utils) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->qwen-vl-utils) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->qwen-vl-utils) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->qwen-vl-utils) (2025.1.31)
Installing collected packages: av, qwen-vl-utils
Successfully installed av-14.3.0 qwen-vl-utils-0.0.10
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
root@64ab8e1634b9:/vllm-workspace# pip install accelerate
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.5.2)
Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.12/dist-packages (from accelerate) (1.26.4)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from accelerate) (24.2)
Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from accelerate) (7.0.0)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.12/dist-packages (from accelerate) (6.0.2)
Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from accelerate) (2.6.0)
Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.12/dist-packages (from accelerate) (0.29.3)
Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from accelerate) (0.5.3)
Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21.0->accelerate) (3.18.0)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21.0->accelerate) (2025.3.0)
Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21.0->accelerate) (2.32.3)
Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21.0->accelerate) (4.67.1)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.21.0->accelerate) (4.12.2)
Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.4.2)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.1.6)
Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)
Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)
Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)
Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (9.1.0.70)
Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.5.8)
Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.2.1.3)
Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (10.3.5.147)
Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (11.6.1.9)
Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.3.1.170)
Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (0.6.2)
Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (2.21.5)
Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)
Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)
Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (3.2.0)
Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (78.0.2)
Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->accelerate) (1.13.1)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy==1.13.1->torch>=2.0.0->accelerate) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2025.1.31)
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
root@64ab8e1634b9:/vllm-workspace# pip install transformers
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.50.0)
Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.18.0)
Requirement already satisfied: huggingface-hub<1.0,>=0.26.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.29.3)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (1.26.4)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (24.2)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)
Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.3)
Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.21.1)
Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.5.3)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.26.0->transformers) (2025.3.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.26.0->transformers) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.1.31)
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
root@64ab8e1634b9:/vllm-workspace#
root@1f522a747005:/vllm-workspace# pip install ninja
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: ninja in /usr/local/lib/python3.12/dist-packages (1.11.1.4)
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
root@1f522a747005:/vllm-workspace# pip install flash-attn --no-build-isolation
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting flash-attn
Using cached https://mirrors.aliyun.com/pypi/packages/11/34/9bf60e736ed7bbe15055ac2dab48ec67d9dbd088d2b4ae318fd77190ab4e/flash_attn-2.7.4.post1.tar.gz (6.0 MB)
Preparing metadata (setup.py) ... done
Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (from flash-attn) (2.6.0)
Requirement already satisfied: einops in /usr/local/lib/python3.12/dist-packages (from flash-attn) (0.8.1)
Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (3.18.0)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (4.12.2)
Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (3.4.2)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (3.1.6)
Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (2025.3.0)
Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.127)
Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.127)
Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.127)
Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (9.1.0.70)
Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.5.8)
Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (11.2.1.3)
Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (10.3.5.147)
Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (11.6.1.9)
Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.3.1.170)
Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (0.6.2)
Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (2.21.5)
Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.127)
Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (12.4.127)
Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (3.2.0)
Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (78.0.2)
Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.12/dist-packages (from torch->flash-attn) (1.13.1)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy==1.13.1->torch->flash-attn) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch->flash-attn) (3.0.2)
Building wheels for collected packages: flash-attn
Building wheel for flash-attn (setup.py) ... /
done
Created wheel for flash-attn: filename=flash_attn-2.7.4.post1-cp312-cp312-linux_x86_64.whl size=186888639 sha256=79114229633796795dd8db26997a6ee2ea81e48cceccbcecd56142cca8297626
Stored in directory: /root/.cache/pip/wheels/14/a1/b5/f5e7044f6c720aa04f0b15ccf92726965c2f70231eaaca7fb9
Successfully built flash-attn
Installing collected packages: flash-attn
Successfully installed flash-attn-2.7.4.post1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
root@1f522a747005:/vllm-workspace#
root@1f522a747005:/vllm-workspace# exit
exit
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker ps|grep 72B
1f522a747005 vllm/vllm-openai:v0.8.2 "python3 -m vllm.ent…" About an hour ago Up About an hour 0.0.0.0:8011->8000/tcp, :::8011->8000/tcp Qwen2.5-VL-72B-Instruct-AWQ
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker commit 1f522a747005 vllm/vllm-openai:v0.8.2
sha256:20038740d99f94755b37120f6f264758e519139c317c3041b881e1c066068e37
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker images|grep 8.2
vllm/vllm-openai v0.8.2 20038740d99f 5 seconds ago 18.1GB
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker compose -f docker-compose.yml down
[+] Running 2/2
✔ Container Qwen2.5-VL-72B-Instruct-AWQ Removed 2.8s
✔ Network qwen25-vl-72b-instruct-awq_default Removed 0.1s
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ#
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker compose -f docker-compose.yml up -d
[+] Running 2/2
✔ Network qwen25-vl-72b-instruct-awq_default Created 0.1s
✔ Container Qwen2.5-VL-72B-Instruct-AWQ Started 0.8s
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ#
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ#
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ#
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# docker logs -f Qwen2.5-VL-72B-Instruct-AWQ
INFO 04-14 23:15:17 [__init__.py:239] Automatically detected platform cuda.
(base) root@node15:/disk2/Qwen2.5-VL-72B-Instruct-AWQ# more docker-compose.yml
#version: '3.3'
services:
# vllm
vllm-openai:
image: vllm/vllm-openai:v0.8.2
container_name: Qwen2.5-VL-72B-Instruct-AWQ
restart: unless-stopped
runtime: nvidia
ports:
- 8011:8000
volumes:
- /disk2:/models
command: >
--model /models/Qwen2.5-VL-72B-Instruct-AWQ
--tokenizer_mode="auto"
--dtype=bfloat16
--max_num_seqs=256
--tensor_parallel_size=2
--gpu-memory-utilization=0.9
--max-model-len=32768
--limit-mm-per-prompt image=2,video=1
--served-model-name=Qwen2.5-VL-72B-Instruct-AWQ
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
device_ids: [ "2,3" ]
ipc: host
networks:
vllm: