本地部署 gemma-3-27b-it

engchina

已于 2025-03-23 08:02:01 修改

阅读量809

点赞数 4

分类专栏： LINUX 文章标签： gemma-3-27b 本地部署

于 2025-03-22 11:19:37 首次发布

本文链接：https://blog.csdn.net/engchina/article/details/146436676

版权

LINUX 专栏收录该内容

884 篇文章

订阅专栏

本地部署 gemma-3-27b-it

0. 引言

gemma-3-27b-it支持多模态，下面是一个简单测试。
在这里插入图片描述

1. 创建虚拟环境

conda create -n vllm_nightly python=3.12 -y
conda activate vllm_nightly

2. 安装 Vllm 及其他依赖库

pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
pip install flash-attn --no-build-isolation
pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6
pip install git+https://github.com/huggingface/transformers@v4.50.0

3. 启动 gemma-3-27b-it

创建 examples/tool_chat_template_gemma3_pythonic.jinja，支持 tool calling，

And the custom chat template was created by starting with the google/gemma-3-4b-it model’s default template here and combined with parts of the llama pythonic tool template

# cat examples/tool_chat_template_gemma3_pythonic.jinja
{{ bos_token }}
{%- if messages[0]['role'] == 'system' -%}
    {%- if messages[0]['content'] is string -%}
        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
    {%- else -%}
        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
    {%- endif -%}
    {%- set loop_messages = messages[1:] -%}
{%- else -%}
    {%- set first_user_prefix = "" -%}
    {%- set loop_messages = messages -%}
{%- endif -%}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | selectattr("tool_calls", "undefined") -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
        {#使用Agent时，并不保证是user/assistant/user/assistant/...的顺序，所以不抛异常#}
        {#{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}#}
    {%- endif -%}
{%- endfor -%}
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'assistant') -%}
        {%- set role = "model" -%}
    {%- elif (message['role'] == 'tool') -%}
        {%- set role = "user" -%}
    {%- else -%}
        {%- set role = message['role'] -%}
    {%- endif -%}
    {{ '<start_of_turn>' + role + '\n' -}}
    {%- if loop.first -%}
        {{ first_user_prefix }}
        {%- if tools is not none -%}
            {{- "You have access to the following tools to help respond to the user. To call tools, please respond with a python list of the calls. DO NOT USE MARKDOWN SYNTAX.\n" }}
            {{- 'Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] \n' }}
            {{- "Do not use variables.\n\n" }}
            {%- for t in tools -%}
                {{- t | tojson(indent=4) }}
                {{- "\n\n" }}
            {%- endfor -%}
        {%- endif -%}
    {%- endif -%}

    {%- if 'tool_calls' in message -%}
        {{- '[' -}}
        {%- for tool_call in message.tool_calls -%}
            {%- if tool_call.function is defined -%}
                {%- set tool_call = tool_call.function -%}
            {%- endif -%}
            {{- tool_call.name + '(' -}}
            {%- for param in tool_call.arguments -%}
                {{- param + '=' -}}
                {{- "%sr" | format(tool_call.arguments[param]) -}}
                {%- if not loop.last -%}, {% endif -%}
            {%- endfor -%}
            {{- ')' -}}
            {%- if not loop.last -%},{%- endif -%}
        {%- endfor -%}
        {{- ']' -}}
    {%- endif -%}

    {%- if (message['role'] == 'tool') -%}
        {{ '<tool_response>\n' -}}
    {%- endif -%}
    {%- if message['content'] is string -%}
        {{ message['content'] | trim }}
    {%- elif message['content'] is iterable -%}
        {%- for item in message['content'] -%}
            {%- if item['type'] == 'image' -%}
                {{ '<start_of_image>' }}
            {%- elif item['type'] == 'text' -%}
                {{ item['text'] | trim }}
            {%- endif -%}
        {%- endfor -%}
    {%- else -%}
        {{ raise_exception("Invalid content type") }}
    {%- endif -%}
    {%- if (message['role'] == 'tool') -%}
        {{ '</tool_response>' -}}
    {%- endif -%}
    {{ '<end_of_turn>\n' }}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{'<start_of_turn>model\n'}}
{%- endif -%}

#eval "$(conda shell.bash hook)"
#conda activate vllm_nightly
CUDA_VISIBLE_DEVICES=3,1,0,2 \
VLLM_USE_V1=1 \
VLLM_WORKER_MULTIPROC_METHOD=spawn \
TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 \
#VLLM_MM_INPUT_CACHE_GiB=0.1 \
vllm serve /root/HuggingFaceCache/models--google--gemma-3-27b-it \
--trust-remote-code --served-model-name gpt-4o \
--gpu-memory-utilization 0.99 \
--tensor-parallel-size 4 --port 8000 \
--api-key sk-123456 --max-model-len 32768 \
--enable-chunked-prefill --limit-mm-per-prompt image=3 \
--max-num-seqs 256 \
--enable-auto-tool-choice \
--tool-call-parser pythonic \
--chat-template examples/tool_chat_template_gemma3_pythonic.jinja