QLora模型微调

Lanvender209517

已于 2024-07-26 21:17:37 修改

阅读量344

点赞数 6

文章标签： python 深度学习

于 2024-07-26 21:15:21 首次发布

本文链接：https://blog.csdn.net/weixin_43693446/article/details/140725014

版权

模型训练

import os
import warnings
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TRANSFORMERS_OFFLINE'] = '1'

warnings.filterwarnings("ignore")

from peft import (LoraConfig,
                  get_peft_model,
                  TaskType)
import torch
from datasets import load_dataset
from transformers import (AutoModelForCausalLM,
                        AutoTokenizer,
                        TrainingArguments,
                        Trainer,
                        DataCollatorForSeq2Seq,
                        BitsAndBytesConfig)

_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B")

_bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                 bnb_4bit_use_double_quant=True,
                                 bnb_4bit_quant_type="nf4",
                                 bnb_4bit_compute_dtype=torch.float32)

_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct",
                                              #   low_cpu_mem_usage=True,
                                              quantization_config=_bnb_config)


_dataset = load_dataset("json", data_files="ruozhiba.json", split="train")


def preprocess_dataset(example):
    MAX_LENGTH = 256
    _input_ids, _attention_mask, _labels = [], [], []
    _instruction = _tokenizer(
        f"User: {example['instruction']}Assistant: ", add_special_tokens=False)
    _response = _tokenizer(
        example["output"] + _tokenizer.eos_token, add_special_tokens=False)
    _input_ids = _instruction["input_ids"] + _response["input_ids"]
    _attention_mask = _instruction["attention_mask"] + \
        _response["attention_mask"]
    _labels = [-100] * len(_instruction["input_ids"]) + _response["input_ids"]
    if len(_input_ids) > MAX_LENGTH:
        _input_ids = _input_ids[:MAX_LENGTH]
        _attention_mask = _attention_mask[:MAX_LENGTH]
        _labels = _labels[:MAX_LENGTH]
    return {
        "input_ids": _input_ids,
        "attention_mask": _attention_mask,
        "labels": _labels
    }


_dataset = _dataset.map(preprocess_dataset, remove_columns=_dataset.column_names)
_dataset = _dataset.shuffle()


config = LoraConfig(task_type=TaskType.CAUSAL_LM,
                    r=8,
                    target_modules="all-linear")


_model = get_peft_model(_model, config)

# model.print_trainable_parameters()
_model.enable_input_require_grads()

_training_args = TrainingArguments(
    output_dir="checkpoints/qlora",
    run_name="qlora_study",
    per_device_train_batch_size=5,
    num_train_epochs=1,
    save_steps=100,
    # deepspeed="deepspeed_config.json"
    # optim="paged_adamw_32bit",

)
trainer = Trainer(
    model=_model,
    args=_training_args,
    train_dataset=_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=_tokenizer, padding=True),
)

trainer.train()