transformers学习3

from transformers import pipeline

"""
句子情感分析
"""
nlp = pipeline("sentiment-analysis")
result = nlp('Trump dismisses director of cybersecurity')[0]

from transformers import AutoTokenizer,AutoModelForSequenceClassification
import torch

"""
序列分类
"""
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc",return_dict=True)
classes = ["not paraphrase","is paraphrase"]
sequence_0 = "The company HuggingFace is based in New York City"
sequence_1 = "Apples are especially bad for your health"
sequence_2 = "HuggingFace's headquarters are situated in Manhattan"
paraphrase = tokenizer(sequence_0,sequence_2,return_tensors="pt")
not_paraphrase = tokenizer(sequence_0,sequence_1,return_tensors="pt")
paraphrase_classification_logits = model(**paraphrase).logits
not_paraphrase_classification_logits = model(**not_paraphrase).logits
paraphrase_results = torch.softmax(paraphrase_classification_logits,dim=1).tolist()[0]
not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits,dim=1).tolist()[0]
for i in range(len(classes)):
    print(f"{classes[i]}:{int(round(paraphrase_results[i] * 100))}%")
for i in range(len(classes)):
    print(f"{classes[i]}: {int(round(not_paraphrase_results[i] * 100))}%")

from transformers import pipeline
"""
这将返回从文本中提取的答案,置信度得分以及“开始”和“结束”值,这些值是提取的答案在文本中的位置。
"""
if __name__ == '__main__':
    nlp = pipeline("question-answering")
    context = r"""
    A TV program has become popular in China recently.
    It has made people all over China become interested in writing Chinese characters.
    The increasing use of computers and smart phones has left most young people hardly able to write by hand.
    Many of them are even unable to remember the 10,000 characters used in daily life without electronic help.
    CCTV started the Chinese Character Dictation Competition to improve the population's handwriting skills.
    Contestants on the show were school pupils,but it was found that 70% of adults in the audience were unable to remember how to write the word 'chan chu'.
    Because computers allow people to type characters simply by entering pinyin, they don't need to remember how to write them.
    "The keyboard age has had a big influence on the handwriting of Chinese characters."Guan Zhengwen,who designed and directed the show said.
    He added that he hoped to encourage people to keep it alive as an art form.
    Hao Mingjian,an editor of a magazine,putting his heart into Chinese characters,said,"Learning Chinese characters goes on throughout your life.If you stop using them for a long time, it is very likely that you will forget them."
    """
    print(nlp(question="Are Chinese people interested in writing Chinese characters now?", context=context))
    print(nlp(question="What has left most young people hardly able to write by hand?", context=context))

"""
提取式问答
"""
from transformers import AutoTokenizer,AutoModelForQuestionAnswering
import torch

if __name__ == '__main__':
    tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
    model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

    text = r"""
        A TV program has become popular in China recently.
        It has made people all over China become interested in writing Chinese characters.
        The increasing use of computers and smart phones has left most young people hardly able to write by hand.
        Many of them are even unable to remember the 10,000 characters used in daily life without electronic help.
        CCTV started the Chinese Character Dictation Competition to improve the population's handwriting skills.
        Contestants on the show were school pupils,but it was found that 70% of adults in the audience were unable to remember how to write the word 'chan chu'.
        Because computers allow people to type characters simply by entering pinyin, they don't need to remember how to write them.
        "The keyboard age has had a big influence on the handwriting of Chinese characters."Guan Zhengwen,who designed and directed the show said.
        He added that he hoped to encourage people to keep it alive as an art form.
        Hao Mingjian,an editor of a magazine,putting his heart into Chinese characters,said,"Learning Chinese characters goes on throughout your life.If you stop using them for a long time, it is very likely that you will forget them."
    """

    questions = [
        "Are Chinese people interested in writing Chinese characters now?",
        "What has left most young people hardly able to write by hand?",
        "Why did CCTV start the Chinese Character Dictation Competition?",
        "How do computers allow people to type characters simply?"
    ]

    for question in questions:
        inputs = tokenizer(question,text,add_special_tokens=True,return_tensors="pt")
        input_ids = inputs["input_ids"].tolist()[0]
        text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
        answer_start_scores,answer_end_scores = model(**inputs)
        answer_start = torch.argmax(answer_start_scores)
        answer_end = torch.argmax(answer_end_scores) + 1
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
        print(f"Question: {question}")
        print(f"Answer: {answer}")

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值