LLM API 简单应用

LuckkyyyBoyyyy
已于 2024-02-13 04:30:52 修改
阅读量482
点赞数 9
文章标签： python
于 2024-02-13 04:16:53 首次发布
本文链接：https://blog.csdn.net/m0_73348712/article/details/136104793
版权
#头文件
pip3 install torch torchvison torchaudio
pip3 install transformers
from transformers import pipeline

#pipline参数
pipline输入任务类别和对应的llm
1.任务类别
'text-generation'
'sentiment-analysis'
'question-answering'
2.模型的类型
2.1短名-Hugging Face Model Hub的预训练模型 bert-base-uncased、gpt2
pipeline('sentiment-analysis', model='bert-base-uncased')
2.2模型的本地路径
pipeline('text-generation', model='/path/to/your/local/model')
2.3模型对象(手动下载并进行参数调节 -> 更高级的用法)
from transformers import GPT2Model
model = GPT2Model.from_pretrained('gpt2')
pipeline('feature-extraction', model=model)

#实例
1.'text-generation'
text_generator = pipeline('text-generation',model = ...) #输入任务  +  模型
generated_texts = text_generator(prompts,
                                max_length // 句子最长单词
                                tokenizer //指定用于文本分词的分词器。与model参数相同，这可以是分词器的ID或本地路径。如果不指定，将默认使用与指定模型匹配的分词器。
                                num_return_sequences //返回几个句子 
                                temperature // 0 - 1 创新度递增
                                top_k // 限制模型在每一步考虑的词汇数量
                                top_p //低`top_p`值倾向于生成连贯但缺乏多样性的文本，而高`top_p`值促进文本的创造性和多样性但可能牺牲连贯性)
#单个句子
print(generated_texts[0][generated_text])
#多个句子
for i, text in enumerate(generated_texts):
     print(f"故事 {i+1}: {text['generated_text']}\n")


2.'sentiment-analysis'
#model：指定要使用的预训练模型。默认情况下，会使用针对情感分析任务预训练的模型，但你可以通过指定模型的路径或模型在Hugging Face Model Hub上的名称来使用其他模型。
text_analysis = pipeline('sentiment-analysis', model='bert-base-uncased')
#tokenizer：指定用于文本分词的分词器。通常，如果你更改了模型，则可能也需要指定与之匹配的分词器。分词器可以是预定义的分词器名称、分词器的路径或直接是分词器的实例
text_analysis = pipeline('sentiment-analysis', tokenizer='bert-base-uncased')
#config：允许指定一个配置对象或路径，用于自定义模型的配置
text_analysis = pipeline('sentiment-analysis', config={'output_hidden_states': True})
#device：指定运行pipeline的设备。-1表示CPU，而非负整数表示GPU的编号。如果你有可用的GPU，可以通过设置此参数来加速计算。
text_analysis = pipeline('sentiment-analysis', device=0)  # 在第一个GPU上运行
#framework：选择使用的深度学习框架，'pt'表示PyTorch，'tf'表示TensorFlow。通常，这会根据安装的库和可用模型自动确定，但也可以手动指定。
text_analysis = pipeline('sentiment-analysis', framework='pt')
#return_all_scores：当你设置return_all_scores=True并对文本进行情感分析时，pipeline会返回每个可能情感类别的得分，而不仅仅是最可能的情感类别及其得分。具体的输出将取决于使用的模型和配置，但一般情况下，对于情感分析任务，输出将是一个列表，列表中的每个元素都是一个字典，表示一个情感类别及其对应的得分。
result = text_analysis("I love using Transformers library!", return_all_scores=True)

//处理单个句子
result = text_analysis("I love using Transformers library!") 
print(result)
[{'label': 'POSITIVE', 'score': 0.9998}]

//处理多个句子
texts = ["I love this car.", "This view is amazing.", "The service was horrible.", "I feel great this morning."]
results = classifier(texts)

for result in results:
    print(result)
[{'label': 'POSITIVE', 'score': 0.99}]
[{'label': 'POSITIVE', 'score': 0.95}]
[{'label': 'NEGATIVE', 'score': 0.97}]
[{'label': 'POSITIVE', 'score': 0.99}]

3.'question-answering'

# 创建一个问题回答的pipeline
qa_pipeline = pipeline("question-answering")

# 定义问题和文本段落
context = """
Transformers library is an open-source, community-based repository for natural language processing (NLP) pre-trained models. The library is based on PyTorch and TensorFlow.
"""
question = "What is the Transformers library based on?"

# 使用pipeline回答问题
//model：指定要使用的预训练模型。这可以是模型的ID（在Hugging Face Model Hub上的名称），也可以是模型的本地路径。
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
//tokenizer：指定用于文本分词的分词器。与model参数相同，这可以是分词器的ID或本地路径。如果不指定，将默认使用与指定模型匹配的分词器。
qa_pipeline = pipeline("question-answering", tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad")
//config：允许指定一个配置对象或路径，用于自定义模型的配置。
qa_pipeline = pipeline("question-answering", config={"output_attentions": True})
//framework：选择使用的深度学习框架，'pt'表示PyTorch，'tf'表示TensorFlow。通常，这会根据安装的库和可用模型自动确定，但也可以手动指定。
qa_pipeline = pipeline("question-answering", framework="pt")
//device：指定运行pipeline的设备。-1表示CPU，而非负整数表示GPU的编号。如果你有可用的GPU，可以通过设置此参数来加速计算
qa_pipeline = pipeline("question-answering", device=0)  # 在第一个GPU上运行

//处理单个句子
result = qa_pipeline(question=question, context=context,....)
print(result)
{'score': 0.97, 'start': 111, 'end': 131, 'answer': 'PyTorch and TensorFlow'}

//处理多个句子
questions = [
    "What is the Transformers library for?",
    "Which frameworks does the Transformers library use?"
]

for question in questions:
    result = qa_pipeline(question=question, context=context)
    print(f"Question: {question}")
    print(f"Answer: {result['answer']}\n")