QLora模型微调

模型训练

import os
import warnings
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TRANSFORMERS_OFFLINE'] = '1'

warnings.filterwarnings("ignore")

from peft import (LoraConfig,
                  get_peft_model,
                  TaskType)
import torch
from datasets import load_dataset
from transformers import (AutoModelForCausalLM,
                        AutoTokenizer,
                        TrainingArguments,
                        Trainer,
                        DataCollatorForSeq2Seq,
                        BitsAndBytesConfig)

_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B")

_bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                 bnb_4bit_use_double_quant=True,
                                 bnb_4bit_quant_type="nf4",
                                 bnb_4bit_compute_dtype=torch.float32)

_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct",
                                              #   low_cpu_mem_usage=True,
                                              quantization_config=_bnb_config)


_dataset = load_dataset("json", data_files="ruozhiba.json", split="train")


def preprocess_dataset(example):
    MAX_LENGTH = 256
    _input_ids, _attention_mask, _labels = [], [], []
    _instruction = _tokenizer(
        f"User: {example['instruction']}Assistant: ", add_special_tokens=False)
    _response = _tokenizer(
        example["output"] + _tokenizer.eos_token, add_special_tokens=False)
    _input_ids = _instruction["input_ids"] + _response["input_ids"]
    _attention_mask = _instruction["attention_mask"] + \
        _response["attention_mask"]
    _labels = [-100] * len(_instruction["input_ids"]) + _response["input_ids"]
    if len(_input_ids) > MAX_LENGTH:
        _input_ids = _input_ids[:MAX_LENGTH]
        _attention_mask = _attention_mask[:MAX_LENGTH]
        _labels = _labels[:MAX_LENGTH]
    return {
        "input_ids": _input_ids,
        "attention_mask": _attention_mask,
        "labels": _labels
    }


_dataset = _dataset.map(preprocess_dataset, remove_columns=_dataset.column_names)
_dataset = _dataset.shuffle()


config = LoraConfig(task_type=TaskType.CAUSAL_LM,
                    r=8,
                    target_modules="all-linear")


_model = get_peft_model(_model, config)

# model.print_trainable_parameters()
_model.enable_input_require_grads()

_training_args = TrainingArguments(
    output_dir="checkpoints/qlora",
    run_name="qlora_study",
    per_device_train_batch_size=5,
    num_train_epochs=1,
    save_steps=100,
    # deepspeed="deepspeed_config.json"
    # optim="paged_adamw_32bit",

)
trainer = Trainer(
    model=_model,
    args=_training_args,
    train_dataset=_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=_tokenizer, padding=True),
)

trainer.train()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值