Bert的简单使用

简单使用了一下Bert,并输出对应的矩阵到文件中。

import torch
from transformers import BertConfig, BertModel, BertTokenizer

def bert_output(texts, name):#texts包含三句话
    tokens, segments, input_masks =[], [], []
    for text in texts:
        tokenized_text = tokenizer.tokenize(text)
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        tokens.append(indexed_tokens)
        segments.append( [0]*len(indexed_tokens) )
        input_masks.append( [1]*len(indexed_tokens) )

    max_len = max([len(single) for single in tokens])  # 最大的句子长度

    for j in range(len(tokens)):
        padding = [0] * (max_len - len(tokens[j]))
        padding_other = [1] * (max_len - len(tokens[j]))
        tokens[j] += padding
        segments[j] += padding
        input_masks[j] += padding_other

    # device = torch.cuda.current_device()

    tokens_tensor = torch.tensor(tokens)
    segments_tensors = torch.tensor(segments)
    input_masks_tensors = torch.tensor(input_masks)

    # output = model(tokens_tensor)
    output = model(tokens_tensor, segments_tensors, input_masks_tensors)
    sequence_output = output[0]
    pooled_output = output[1] # CLS
    torch.set_printoptions(edgeitems=768)#矩阵全部显示,不隐藏

    with open(name, 'a', encoding='utf-8') as f:
        # f.write("sequence_output:")
        # f.write(str(sequence_output))
        # f.write('\n')
        f.write("pooled_output:")
        f.write(str(pooled_output))
    # return pooled_output,sequence_output#输出CLS、与其他向量

if __name__ == '__main__':
    tokenizer = BertTokenizer.from_pretrained('./bert-base-uncased')
    model_config = BertConfig.from_pretrained('./bert-base-uncased')#model_config.hidden_size=768
    model = BertModel.from_pretrained('./bert-base-uncased',config=model_config)


    texts_atis =  ["[CLS] i want to fly from baltimore to dallas round trip [SEP]",
                   "[CLS] show me the flights arriving baltimore on june fourteenth [SEP]",
                   "[CLS] which airlines fly from boston to washington via other cities [SEP]"]
    texts_snips = ["[CLS] what the weather in my current spot the [SEP]",
                   "[CLS] what the weather like in the city frewen [SEP]",
                   "[CLS] what the weather supposed to be like today [SEP]"]
    # texts_snips = ["[CLS] what the weather supposed to be like today [SEP]"]

    atis = 'atis.txt'
    snips = 'snips.txt'

    bert_output(texts_snips, snips)
    #bert输出向量CLS+others
    #atis_out_CLS,atis_out_others = bert_output(texts_atis, atis)#CLS二维矩阵,other三维矩阵
   


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值