本地部署 gemma-3-27b-it
0. 引言
gemma-3-27b-it支持多模态,下面是一个简单测试。
1. 创建虚拟环境
conda create -n vllm_nightly python=3.12 -y
conda activate vllm_nightly
2. 安装 Vllm 及其他依赖库
pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
pip install flash-attn --no-build-isolation
pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6
pip install git+https://github.com/huggingface/transformers@v4.50.0
3. 启动 gemma-3-27b-it
创建 examples/tool_chat_template_gemma3_pythonic.jinja
,支持 tool calling,
And the custom chat template was created by starting with the google/gemma-3-4b-it model’s default template here and combined with parts of the llama pythonic tool template
# cat examples/tool_chat_template_gemma3_pythonic.jinja
{{ bos_token }}
{%- if messages[0]['role'] == 'system' -%}
{%- if messages[0]['content'] is string -%}
{%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
{%- else -%}
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
{%- endif -%}
{%- set loop_messages = messages[1:] -%}
{%- else -%}
{%- set first_user_prefix = "" -%}
{%- set loop_messages = messages -%}
{%- endif -%}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | selectattr("tool_calls", "undefined") -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{#使用Agent时,并不保证是user/assistant/user/assistant/...的顺序,所以不抛异常#}
{#{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}#}
{%- endif -%}
{%- endfor -%}
{%- for message in loop_messages -%}
{%- if (message['role'] == 'assistant') -%}
{%- set role = "model" -%}
{%- elif (message['role'] == 'tool') -%}
{%- set role = "user" -%}
{%- else -%}
{%- set role = message['role'] -%}
{%- endif -%}
{{ '<start_of_turn>' + role + '\n' -}}
{%- if loop.first -%}
{{ first_user_prefix }}
{%- if tools is not none -%}
{{- "You have access to the following tools to help respond to the user. To call tools, please respond with a python list of the calls. DO NOT USE MARKDOWN SYNTAX.\n" }}
{{- 'Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] \n' }}
{{- "Do not use variables.\n\n" }}
{%- for t in tools -%}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor -%}
{%- endif -%}
{%- endif -%}
{%- if 'tool_calls' in message -%}
{{- '[' -}}
{%- for tool_call in message.tool_calls -%}
{%- if tool_call.function is defined -%}
{%- set tool_call = tool_call.function -%}
{%- endif -%}
{{- tool_call.name + '(' -}}
{%- for param in tool_call.arguments -%}
{{- param + '=' -}}
{{- "%sr" | format(tool_call.arguments[param]) -}}
{%- if not loop.last -%}, {% endif -%}
{%- endfor -%}
{{- ')' -}}
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}
{{- ']' -}}
{%- endif -%}
{%- if (message['role'] == 'tool') -%}
{{ '<tool_response>\n' -}}
{%- endif -%}
{%- if message['content'] is string -%}
{{ message['content'] | trim }}
{%- elif message['content'] is iterable -%}
{%- for item in message['content'] -%}
{%- if item['type'] == 'image' -%}
{{ '<start_of_image>' }}
{%- elif item['type'] == 'text' -%}
{{ item['text'] | trim }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{ raise_exception("Invalid content type") }}
{%- endif -%}
{%- if (message['role'] == 'tool') -%}
{{ '</tool_response>' -}}
{%- endif -%}
{{ '<end_of_turn>\n' }}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{'<start_of_turn>model\n'}}
{%- endif -%}
#eval "$(conda shell.bash hook)"
#conda activate vllm_nightly
CUDA_VISIBLE_DEVICES=3,1,0,2 \
VLLM_USE_V1=1 \
VLLM_WORKER_MULTIPROC_METHOD=spawn \
TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 \
#VLLM_MM_INPUT_CACHE_GiB=0.1 \
vllm serve /root/HuggingFaceCache/models--google--gemma-3-27b-it \
--trust-remote-code --served-model-name gpt-4o \
--gpu-memory-utilization 0.99 \
--tensor-parallel-size 4 --port 8000 \
--api-key sk-123456 --max-model-len 32768 \
--enable-chunked-prefill --limit-mm-per-prompt image=3 \
--max-num-seqs 256 \
--enable-auto-tool-choice \
--tool-call-parser pythonic \
--chat-template examples/tool_chat_template_gemma3_pythonic.jinja
4. 测试一下 gemma-3-27b-it
完结!
参考资料: