llm推理

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig

# 禁用一些CUDA优化以节省内存
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_token_len = 1024 # 输入token最大长度
output_token_len = 100 # 输出token最大长度
# 读取测试集数据
test_df = pd.read_csv('/kaggle/input/llm-prompt-recovery/test.csv')
# 定义基础模型和lora适配器模型的路径
base_model_name = "/kaggle/input/mistral-7b-it-v02"
adapter_model_name = "/kaggle/input/mistral-7b/mistral_7b_099999_lr0.0001_5ep_lora(r16,a32,d0.05,default)"
# 加载预训练模型的tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name,trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
# 设置BitsAndBytesConfig 4bit量化配置
bnb_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)
# 加载预训练模型
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config, # 应用4bit量化配置
    trust_remote_code=True, # 信任远程代码
    device_map="auto", # 自动进行设备映射
    use_auth_token=True
)

# 加载LoRA适配器
model = PeftModel.from_pretrained(
    model, 
    adapter_model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model.to(device)
model.eval()
def text_generate(ori_text, rew_text, model, tokenizer, input_max_len=512, output_len=20, device='cuda'):
    '''
    使用模型生成回复
    '''
    # 构建输入prompt
    prompt = f"Instruct: Original Text:{ori_text}\nRewritten Text:{rew_text}\nWrite a prompt that was likely given to the LLM to rewrite original text into rewritten text.\nOutput:"
    # 对输入进行tokenize,限制最大长度,返回PyTorch张量
    inputs = tokenizer(prompt, max_length=input_max_len, truncation=True, return_tensors="pt", return_attention_mask=False)
    # 将数据移动到gpu
    inputs = {k:v.to(device) for k,v in inputs.items()}
    
    # 计算输入token数量
    input_token_len = len(inputs.input_ids[0])
    # 计算生成的最大长度
    max_len = input_token_len + output_len
    # 使用模型生成文本
    outputs = model.generate(**inputs,
                         do_sample=False,
                         max_length=max_len,
                         pad_token_id=tokenizer.pad_token_id,
                         )
    # 解码生成的token为文本
    text = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    # 找到生成文本的起始位置
    start_index = text.find('Output:')
    # 提取生成的文本
    generated_text = text[start_index+len('Output:'):].strip()
    return generated_text
# 通用平均prompt
mean_prompt = "improve phrasing text lucrarea tone lucrarea rewrite this creatively formalize discours involving lucrarea anyone emulate lucrarea description send casual perspective information alter it lucrarea ss plotline speaker recommend doing if elegy tone lucrarea more com n paraphrase ss forward this st text redesign poem above etc possible llm clear lucrarea"
rewrite_prompts = []
# 遍历测试集数据
for i, row in tqdm(test_df.iterrows(), total=len(test_df)):
    try:
        # 使用模型生成rewrite prompt
        prompt = text_generate(row['original_text'],
                               row['rewritten_text'],
                               model,
                               tokenizer,
                               input_token_len,
                               output_token_len,
                               device,
                               )
        # 将生成的prompt拼接上平均prompt
        prompt += mean_prompt
    except:
        # 如果生成失败,直接使用平均prompt
        prompt = mean_prompt
        
    rewrite_prompts.append(prompt)
# 将生成的rewrite prompt添加到DataFrame中
test_df['rewrite_prompt'] = rewrite_prompts
sub_df = test_df[['id', 'rewrite_prompt']] # # 只保留id和rewrite_prompt两列
sub_df.to_csv('submission.csv', index=False)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值