from transformers import TrainingArguments,Trainer,DataCollatorForSeq2Seq
from peft import LoraConfig,get_peft_model,TaskType
config = LoraConfig(task_type=TaskType.CAUSAL_LM,
target_modules="all-linear", #调整所有线性层
modules_to_save=["word_embeddings"] #调整词嵌入层
)
model = get_peft_model("你加载的大模型", config)
training_args = TrainingArguments(
output_dir="你的权重存放路径",
per_device_train_batch_size=10,
gradient_accumulation_steps=1, # 多少次batch更新一次梯度
gradient_checkpointing=True,
num_train_epochs=6,
save_steps=100,
optim="paged_adamw_32bit"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset="你加载的预处理dataset",
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer,padding=True)
)
trainer.train()
训练完成后,Lora增量模型的加载和合并
from peft import PeftModel
peft_model = PeftModel.from_pretrained(model="你加载的大模型", model_id="你训练的模型权重路径")
peft_model = peft_model.merge_and_unload()
peft_model.save_pretrained("整合后的模型保存路径")
至此,合并结束!