lora 部分合并到原模型参数上
import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
from transformers.generation.utils import GenerationConfig
def apply_lora(model_name_or_path, output_path, lora_path):
print(f"Loading the base model from {model_name_or_path}")
base_tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False, trust_remote_code=True)
base = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="cuda:0", torch_dtype=torch.bfloat16, trust_remote_code=True)
# base.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
print(f"Loading the LoRA adapter from {lora_path}")
lora_model = PeftModel.from_pretrained(
base,
lora_path,
torch_dtype=torch.float16,
)
print("Applying the LoRA")
model = lora_model.merge_and_unload()
print(f"Saving the target model to {output_path}")
model.save_pretrained(output_path)
base_tokenizer.save_pretrained(output_path)
if __name__ == "__main__":
lora_path = "/data2/xinyuuliu/LLaMA-Factory/saves/qwen/lora/orpo"
model_path = "/data2/xinyuuliu/Qwen1.5-7B-Chat"
output = "/data2/xinyuuliu/LLaMA-Factory/saves/qwen/lora/orpo/lora_merge"
apply_lora(model_path,output,lora_path)