@TOC
本文介绍了NeMo如何训练llama2_7b模型
1.参考链接
2.创建容器
docker run --gpus all --shm-size=32g -ti -e NVIDIA_VISIBLE_DEVICES=all \
--privileged --net=host -v $PWD:/home \
-w /home --name NeMo \
nvcr.io/nvidia/nemo:24.05 /bin/bash
mkdir -p /home/NeMo
3.数据转换
cd /home/NeMo
python /opt/NeMo/scripts/nlp_language_modeling/preprocess_data_for_megatron.py \
--input=/home/autotrain/datasets/timdettmers/openassistant-guanaco/openassistant_best_replies_train.jsonl \
--json-keys=text \
--tokenizer-library=sentencepiece \
--tokenizer-model=/home/ModelLink/llama-2-7b-hf/tokenizer.model \
--output-prefix=gpt_training_data \
--append-eod \
--workers=32
4.从零开始训练
python /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_pretraining.py \
--config-path=/opt/NeMo-Framework-Launcher/launcher_scripts/conf/training/llama \
--config-name=llama2_7b \
trainer.devices=8 \
trainer.num_nodes=1 \
trainer.max_epochs=null \
trainer.max_steps=300000 \
trainer.val_check_interval=300 \
trainer.log_every_n_steps=50 \
trainer.limit_val_batches=50 \
trainer.limit_test_batches