Llama3-Tutorial

最新推荐文章于 2024-09-15 22:31:42 发布

ShelterWFF

最新推荐文章于 2024-09-15 22:31:42 发布

阅读量299

点赞数 3

文章标签： python

本文链接：https://blog.csdn.net/weixin_73879016/article/details/138982429

版权

Refer

https://github.com/SmartFlowAI/Llama3-Tutorial

Llama 3 本地 Web Demo 部署

# 环境配置
conda create -n llama3 python=3.10
conda activate llama3
conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 pytorch-cuda=12.1 -c pytorch -c nvidia

# 下载模型
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/meta-llama/Meta-Llama-3-8B-Instruct ~/model/Meta-Llama-3-8B-Instruct

# Web Demo 部署
cd ~
git clone https://github.com/SmartFlowAI/Llama3-Tutorial
cd ~
git clone -b v0.1.18 https://github.com/InternLM/XTuner
cd XTuner
pip install -e .
streamlit run ~/Llama3-Tutorial/tools/internstudio_web_demo.py \
  ~/model/Meta-Llama-3-8B-Instruct

Llama 3 微调个人小助手认知

# 自我认知训练数据集准备
cd ~/Llama3-Tutorial
python tools/gdata.py 

# 训练模型
cd ~/Llama3-Tutorial

# 开始训练,使用 deepspeed 加速，A100 40G显存 耗时24分钟
xtuner train configs/assistant/llama3_8b_instruct_qlora_assistant.py --work-dir /root/llama3_pth

# Adapter PTH 转 HF 格式
xtuner convert pth_to_hf /root/llama3_pth/llama3_8b_instruct_qlora_assistant.py \
  /root/llama3_pth/iter_500.pth \
  /root/llama3_hf_adapter

# 模型合并
export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
  /root/llama3_hf_adapter\
  /root/llama3_hf_merged

# 推理验证
streamlit run ~/Llama3-Tutorial/tools/internstudio_web_demo.py \
  /root/llama3_hf_merged

Llama 3 图片理解能力微调

# 准备 Visual Encoder 权重
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/openai/clip-vit-large-patch14-336 .

# 准备 Image Projector 权重
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/xtuner/llama3-llava-iter_2181.pth .

# 数据准备
cd ~
git clone https://github.com/InternLM/tutorial -b camp2
python ~/tutorial/xtuner/llava/llava_data/repeat.py \
  -i ~/tutorial/xtuner/llava/llava_data/unique_data.json \
  -o ~/tutorial/xtuner/llava/llava_data/repeated_data.json \
  -n 200

# 微调过程
pip install deepspeed
conda install mpi4py
xtuner train ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py --work-dir ~/llama3_llava_pth --deepspeed deepspeed_zero2

xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py \
  ~/model/llama3-llava-iter_2181.pth \
  ~/llama3_llava_pth/pretrain_iter_2181_hf

xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py \
  ~/llama3_llava_pth/iter_1200.pth \
  ~/llama3_llava_pth/iter_1200_hf

export MKL_SERVICE_FORCE_INTEL=1
xtuner chat /root/model/Meta-Llama-3-8B-Instruct \
  --visual-encoder /root/model/clip-vit-large-patch14-336 \
  --llava /root/llama3_llava_pth/pretrain_iter_2181_hf \
  --prompt-template llama3_chat \
  --image /root/tutorial/xtuner/llava/llava_data/test_img/oph.jpg

export MKL_SERVICE_FORCE_INTEL=1
xtuner chat /root/model/Meta-Llama-3-8B-Instruct \
  --visual-encoder /root/model/clip-vit-large-patch14-336 \
  --llava /root/llama3_llava_pth/iter_1200_hf \
  --prompt-template llama3_chat \
  --image /root/tutorial/xtuner/llava/llava_data/test_img/oph.jpg

Llama 3 高效部署实践

pip install -U lmdeploy[all]

lmdeploy chat /root/model/Meta-Llama-3-8B-Instruct/ --cache-max-entry-count 0.1

# 使用W4A16量化
lmdeploy lite auto_awq \
   /root/model/Meta-Llama-3-8B-Instruct \
  --calib-dataset 'ptb' \
  --calib-samples 128 \
  --calib-seqlen 1024 \
  --w-bits 4 \
  --w-group-size 128 \
  --work-dir /root/model/Meta-Llama-3-8B-Instruct_4bit

lmdeploy chat /root/model/Meta-Llama-3-8B-Instruct_4bit --model-format awq --cache-max-entry-count 0.01

# LMDeploy服务（serve）
lmdeploy serve api_server \
    /root/model/Meta-Llama-3-8B-Instruct_4bit \
    --model-format awq \
    --quant-policy 0 \
    --cache-max-entry-count 0.01 \
    --server-name 0.0.0.0 \
    --server-port 23333 \
    --tp 1


ssh -CNg -L 23333:127.0.0.1:23333 root@ssh.intern-ai.org.cn -p 46619

# 命令行客户端连接API服务器
lmdeploy serve api_client http://localhost:23333

pip install gradio==3.50.2

lmdeploy serve gradio http://localhost:23333 \
    --server-name 0.0.0.0 \
    --server-port 6006

Llama 3 Agent 能力体验与微调

cd ~
cp -r /root/share/new_models/internlm/Agent-FLAN .
chmod -R 755 Agent-FLAN
cd ~
git lfs install
git clone https://huggingface.co/datasets/internlm/Agent-FLAN
python ~/Llama3-Tutorial/tools/convert_agentflan.py ~/Agent-FLAN/data

# 微调启动
export MKL_SERVICE_FORCE_INTEL=1
xtuner train ~/Llama3-Tutorial/configs/llama3-agentflan/llama3_8b_instruct_qlora_agentflan_3e.py --work-dir ~/llama3_agent_pth --deepspeed deepspeed_zero2

# 转换权重
xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-agentflan/llama3_8b_instruct_qlora_agentflan_3e.py \
    ~/llama3_agent_pth/iter_18516.pth \
    ~/llama3_agent_pth/iter_18516_hf


# 合并权重
export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
    ~/llama3_agent_pth/iter_18516_hf \
    ~/llama3_agent_pth/merged

export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
    /share/new_models/agent-flan/iter_2316_hf \
    ~/llama3_agent_pth/merged

pip install lagent

streamlit run ~/Llama3-Tutorial/tools/agent_web_demo.py /root/model/Meta-Llama-3-8B-Instruct

streamlit run ~/Llama3-Tutorial/tools/agent_web_demo.py /root/llama3_agent_pth/merged


# Please help me search for the InternLM2 Technical Report.
# 搜索InternLM2的技术报告

Llama 3 能力评测

cd ~
git clone https://github.com/open-compass/opencompass opencompass
cd opencompass
pip install -e .

pip install -r requirements.txt
pip install protobuf
export MKL_SERVICE_FORCE_INTEL=1
export MKL_THREADING_LAYER=GNU

下载数据集到 data/ 处
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
unzip OpenCompassData-core-20240207.zip

# 列出所有配置
# python tools/list_configs.py
# 列出所有跟 llama (模型)及 ceval（数据集） 相关的配置
python tools/list_configs.py llama ceval

python run.py --datasets ceval_gen --hf-path /root/model/Meta-Llama-3-8B-Instruct --tokenizer-path /root/model/Meta-Llama-3-8B-Instruct --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True --model-kwargs trust_remote_code=True device_map='auto' --max-seq-len 2048 --max-out-len 16 --batch-size 4 --num-gpus 1 --debug

git clone https://github.com/pltrdy/rouge
cd rouge
python setup.py install

python run.py \
--datasets ceval_gen \
--hf-path /root/model/Meta-Llama-3-8B-Instruct \  # HuggingFace 模型路径
--tokenizer-path /root/model/Meta-Llama-3-8B-Instruct \  # HuggingFace tokenizer 路径（如果与模型路径相同，可以省略）
--tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # 构建 tokenizer 的参数
--model-kwargs device_map='auto' trust_remote_code=True \  # 构建模型的参数
--max-seq-len 2048 \  # 模型可以接受的最大序列长度
--max-out-len 16 \  # 生成的最大 token 数
--batch-size 4  \  # 批量大小
--num-gpus 1 \ # 运行模型所需的 GPU 数量
--debug

python run.py '/root/opencompass/configs/eval_llama3_instruct.py'