Refer
https://github.com/SmartFlowAI/Llama3-Tutorial
Llama 3 本地 Web Demo 部署
# 环境配置
conda create -n llama3 python=3.10
conda activate llama3
conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 pytorch-cuda=12.1 -c pytorch -c nvidia
# 下载模型
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/meta-llama/Meta-Llama-3-8B-Instruct ~/model/Meta-Llama-3-8B-Instruct
# Web Demo 部署
cd ~
git clone https://github.com/SmartFlowAI/Llama3-Tutorial
cd ~
git clone -b v0.1.18 https://github.com/InternLM/XTuner
cd XTuner
pip install -e .
streamlit run ~/Llama3-Tutorial/tools/internstudio_web_demo.py \
~/model/Meta-Llama-3-8B-Instruct
Llama 3 微调个人小助手认知
# 自我认知训练数据集准备
cd ~/Llama3-Tutorial
python tools/gdata.py
# 训练模型
cd ~/Llama3-Tutorial
# 开始训练,使用 deepspeed 加速,A100 40G显存 耗时24分钟
xtuner train configs/assistant/llama3_8b_instruct_qlora_assistant.py --work-dir /root/llama3_pth
# Adapter PTH 转 HF 格式
xtuner convert pth_to_hf /root/llama3_pth/llama3_8b_instruct_qlora_assistant.py \
/root/llama3_pth/iter_500.pth \
/root/llama3_hf_adapter
# 模型合并
export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
/root/llama3_hf_adapter\
/root/llama3_hf_merged
# 推理验证
streamlit run ~/Llama3-Tutorial/tools/internstudio_web_demo.py \
/root/llama3_hf_merged
Llama 3 图片理解能力微调
# 准备 Visual Encoder 权重
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/openai/clip-vit-large-patch14-336 .
# 准备 Image Projector 权重
mkdir -p ~/model
cd ~/model
ln -s /root/share/new_models/xtuner/llama3-llava-iter_2181.pth .
# 数据准备
cd ~
git clone https://github.com/InternLM/tutorial -b camp2
python ~/tutorial/xtuner/llava/llava_data/repeat.py \
-i ~/tutorial/xtuner/llava/llava_data/unique_data.json \
-o ~/tutorial/xtuner/llava/llava_data/repeated_data.json \
-n 200
# 微调过程
pip install deepspeed
conda install mpi4py
xtuner train ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py --work-dir ~/llama3_llava_pth --deepspeed deepspeed_zero2
xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py \
~/model/llama3-llava-iter_2181.pth \
~/llama3_llava_pth/pretrain_iter_2181_hf
xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-llava/llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_lora_e1_finetune.py \
~/llama3_llava_pth/iter_1200.pth \
~/llama3_llava_pth/iter_1200_hf
export MKL_SERVICE_FORCE_INTEL=1
xtuner chat /root/model/Meta-Llama-3-8B-Instruct \
--visual-encoder /root/model/clip-vit-large-patch14-336 \
--llava /root/llama3_llava_pth/pretrain_iter_2181_hf \
--prompt-template llama3_chat \
--image /root/tutorial/xtuner/llava/llava_data/test_img/oph.jpg
export MKL_SERVICE_FORCE_INTEL=1
xtuner chat /root/model/Meta-Llama-3-8B-Instruct \
--visual-encoder /root/model/clip-vit-large-patch14-336 \
--llava /root/llama3_llava_pth/iter_1200_hf \
--prompt-template llama3_chat \
--image /root/tutorial/xtuner/llava/llava_data/test_img/oph.jpg
Llama 3 高效部署实践
pip install -U lmdeploy[all]
lmdeploy chat /root/model/Meta-Llama-3-8B-Instruct/ --cache-max-entry-count 0.1
# 使用W4A16量化
lmdeploy lite auto_awq \
/root/model/Meta-Llama-3-8B-Instruct \
--calib-dataset 'ptb' \
--calib-samples 128 \
--calib-seqlen 1024 \
--w-bits 4 \
--w-group-size 128 \
--work-dir /root/model/Meta-Llama-3-8B-Instruct_4bit
lmdeploy chat /root/model/Meta-Llama-3-8B-Instruct_4bit --model-format awq --cache-max-entry-count 0.01
# LMDeploy服务(serve)
lmdeploy serve api_server \
/root/model/Meta-Llama-3-8B-Instruct_4bit \
--model-format awq \
--quant-policy 0 \
--cache-max-entry-count 0.01 \
--server-name 0.0.0.0 \
--server-port 23333 \
--tp 1
ssh -CNg -L 23333:127.0.0.1:23333 root@ssh.intern-ai.org.cn -p 46619
# 命令行客户端连接API服务器
lmdeploy serve api_client http://localhost:23333
pip install gradio==3.50.2
lmdeploy serve gradio http://localhost:23333 \
--server-name 0.0.0.0 \
--server-port 6006
Llama 3 Agent 能力体验与微调
cd ~
cp -r /root/share/new_models/internlm/Agent-FLAN .
chmod -R 755 Agent-FLAN
cd ~
git lfs install
git clone https://huggingface.co/datasets/internlm/Agent-FLAN
python ~/Llama3-Tutorial/tools/convert_agentflan.py ~/Agent-FLAN/data
# 微调启动
export MKL_SERVICE_FORCE_INTEL=1
xtuner train ~/Llama3-Tutorial/configs/llama3-agentflan/llama3_8b_instruct_qlora_agentflan_3e.py --work-dir ~/llama3_agent_pth --deepspeed deepspeed_zero2
# 转换权重
xtuner convert pth_to_hf ~/Llama3-Tutorial/configs/llama3-agentflan/llama3_8b_instruct_qlora_agentflan_3e.py \
~/llama3_agent_pth/iter_18516.pth \
~/llama3_agent_pth/iter_18516_hf
# 合并权重
export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
~/llama3_agent_pth/iter_18516_hf \
~/llama3_agent_pth/merged
export MKL_SERVICE_FORCE_INTEL=1
xtuner convert merge /root/model/Meta-Llama-3-8B-Instruct \
/share/new_models/agent-flan/iter_2316_hf \
~/llama3_agent_pth/merged
pip install lagent
streamlit run ~/Llama3-Tutorial/tools/agent_web_demo.py /root/model/Meta-Llama-3-8B-Instruct
streamlit run ~/Llama3-Tutorial/tools/agent_web_demo.py /root/llama3_agent_pth/merged
# Please help me search for the InternLM2 Technical Report.
# 搜索InternLM2的技术报告
Llama 3 能力评测
cd ~
git clone https://github.com/open-compass/opencompass opencompass
cd opencompass
pip install -e .
pip install -r requirements.txt
pip install protobuf
export MKL_SERVICE_FORCE_INTEL=1
export MKL_THREADING_LAYER=GNU
下载数据集到 data/ 处
wget https://github.com/open-compass/opencompass/releases/download/0.2.2.rc1/OpenCompassData-core-20240207.zip
unzip OpenCompassData-core-20240207.zip
# 列出所有配置
# python tools/list_configs.py
# 列出所有跟 llama (模型)及 ceval(数据集) 相关的配置
python tools/list_configs.py llama ceval
python run.py --datasets ceval_gen --hf-path /root/model/Meta-Llama-3-8B-Instruct --tokenizer-path /root/model/Meta-Llama-3-8B-Instruct --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True --model-kwargs trust_remote_code=True device_map='auto' --max-seq-len 2048 --max-out-len 16 --batch-size 4 --num-gpus 1 --debug
git clone https://github.com/pltrdy/rouge
cd rouge
python setup.py install
python run.py \
--datasets ceval_gen \
--hf-path /root/model/Meta-Llama-3-8B-Instruct \ # HuggingFace 模型路径
--tokenizer-path /root/model/Meta-Llama-3-8B-Instruct \ # HuggingFace tokenizer 路径(如果与模型路径相同,可以省略)
--tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \ # 构建 tokenizer 的参数
--model-kwargs device_map='auto' trust_remote_code=True \ # 构建模型的参数
--max-seq-len 2048 \ # 模型可以接受的最大序列长度
--max-out-len 16 \ # 生成的最大 token 数
--batch-size 4 \ # 批量大小
--num-gpus 1 \ # 运行模型所需的 GPU 数量
--debug
python run.py '/root/opencompass/configs/eval_llama3_instruct.py'