transformers调用llama的方式

不同版本llama对应的transformers库版本

# llama2
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.32.1
pip install accelerate==0.22.0
# llama3
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.35.0
pip install accelerate==0.22.0
# llama3.1
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
pip install transformers==4.43.1
pip install accelerate==0.22.0

llama2

待补充

llama3

Meta-Llama-3-8B-Instruct

可用于QA,summarize,示例代码

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='cuda',
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=20,
    eos_token_id=terminators,
    do_sample=False,
    temperature=0.0,
    top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 "playing football"

Meta-Llama-3-8B

可用于文本生成,使用体验一般

import transformers
import torch
from transformers import AutoTokenizer
model_id = "/home/mayunchuan/.cache/huggingface/transformers/meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=40,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)
result = pipeline("Hey how are you doing today?")
print(result) # 输出 [{'generated_text': 'Hey how are you doing today? I am doing well. I am a little bit tired because I have been working a lot. I am a little bit tired because I have been working a lot.'}]

llama3.1

Meta-Llama-3.1-8B-Instruct

可用于QA,summarize,可使用llama3-chat同样的示例代码

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='cuda',
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=20,
    eos_token_id=terminators,
    do_sample=False,
    temperature=0.0,
    top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 Playing football.

也可以使用另一个demo

import transformers
import torch
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=35,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)
result = pipeline("who are you?")
print(result)

import transformers
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are an assistant who provides precise and direct answers."},
    {"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
outputs = pipeline(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]) # 输出 {'role': 'assistant', 'content': 'Playing football.'}
  • 5
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值