在本地加载Lora之后的llama2

最新推荐文章于 2024-10-01 10:18:28 发布

NNNNwang

最新推荐文章于 2024-10-01 10:18:28 发布

阅读量366

点赞数 4

文章标签： nlp 自然语言处理 llama

本文链接：https://blog.csdn.net/qq_62525850/article/details/142500541

版权

import os
import transformers
from transformers import AutoModel,AutoTokenizer
import torch
os.environ['HF_HOME'] = '/root/autodl-tmp/cache/' #将数据存储在数据盘，默认路径会把模型存在系统盘/root/cache
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path="/root/autodl-tmp/autodl-fs/LLaMA-Factory/models/llama2_lora_sft"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="auto")#映射到cuda上，在cpu会被killed

prompt ="""interaction sequence:[\"Guess Who's Coming to Dinner\", 'Gladiator', \"Soldier's Story, A\", 'Key Largo', 'Verdict, The', 'Like Water for Chocolate (Como agua para chocolate)', 'Children of a Lesser God', 'Dances with Wolves', 'Day the Earth Stood Still, The', 'Stand and Deliver', 'Manhattan', 'Starman', 'Kramer Vs. Kramer', \"Miller's Crossing\", 'Simon Birch']\
    user feedback:Actor:I don't like Woody Allen, I prefer Ian Michael Smith."""
inputs = tokenizer(prompt,return_tensors="pt")#返回张量
inputs.input_ids = inputs.input_ids.to('cuda')
# Generate
generate_ids = model.generate(inputs.input_ids, max_length=1024)
res = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print(res)