程序描述~

想念@思恋

已于 2022-03-10 10:23:23 修改

阅读量216

点赞数

分类专栏： pytorch python编程文章标签： pytorch 深度学习 python

于 2021-02-24 17:20:19 首次发布

本文链接：https://blog.csdn.net/tailonh/article/details/114012983

版权

python编程同时被 2 个专栏收录

139 篇文章 10 订阅

订阅专栏

pytorch

47 篇文章 2 订阅

订阅专栏

1、
pos_mask_matrix = torch.clamp(pos_mask_matrix.float(), 0, 1) # 将tensor限制到0-1之间
exp_u_pos = torch.exp(u_pos)
delta_exp_u_pos = torch.mul(exp_u_pos, pos_mask_matrix)
sum_delta_exp_u_pos = torch.stack([torch.sum(delta_exp_u_pos, 2)] * delta_exp_u_pos.shape[2], 2)
p_pos = torch.div(delta_exp_u_pos, sum_delta_exp_u_pos + 1e-10)

import torch

exp_u_pos = torch.tensor([[[1,2,3],
                           [4,5,6],
                           [7,8,9]],
                          [[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]]
                          ])
pos_mask_matrix = torch.tensor([[[1.0, 0.0, 1.0],
                                 [0.0, 1.0, 1.0],
                                 [1.0, 1.0, 1.0]],
                               [[1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0]]
                                ])
print(exp_u_pos.shape)
print(pos_mask_matrix.shape)
delta_exp_u_pos = torch.mul(exp_u_pos, pos_mask_matrix)                             # 只计算相关ngram的向量，其余位置置零    # [2,3,3]
sum_delta_exp_u_pos = torch.stack([torch.sum(delta_exp_u_pos, 2)] * delta_exp_u_pos.shape[2], 2)
attention = torch.div(delta_exp_u_pos, sum_delta_exp_u_pos + 1e-10)
print("delta_exp_u_pos:\n",delta_exp_u_pos)
print("torch.sum(delta_exp_u_pos):\n",torch.sum(delta_exp_u_pos,2))                 # 把相关ngram向量相加  [2, 3]
print("res:\n",[torch.sum(delta_exp_u_pos, 2)] * delta_exp_u_pos.shape[2])          # 
print(sum_delta_exp_u_pos.shape)                                                    # 
print("sum_delta_exp_u_pos:\n",sum_delta_exp_u_pos)                                 # [2,3,3]
print("attention :\n",attention)                                                    # 每一行除以每一行的和，计算attention

class MultiChannelAttention(nn.Module):
    def __init__(self, ngram_size, hidden_size, cat_num):
        super(MultiChannelAttention, self).__init__()
        self.word_embedding = nn.Embedding(ngram_size, hidden_size, padding_idx=0)
        self.channel_weight = nn.Embedding(cat_num, 1)
        self.temper = hidden_size ** 0.5

    def forward(self, word_seq, hidden_state, char_word_mask_matrix, channel_ids):
        # word_seq: (batch_size, channel, word_seq_len)
        # hidden_state: (batch_size, character_seq_len, hidden_size)
        # mask_matrix: (batch_size, channel, character_seq_len, word_seq_len)
        '''
            word_seq = word_ids = ngram_ids,
            matching_matrix = [channel, max_seq_length, max_word_size],
            word_mask = matching_matrix,
            channel_ids = tensor([0,1,2,3,4,5,6,7,8,9])
        '''
        # embedding (batch_size, channel, word_seq_len, word_embedding_dim)
        batch_size, character_seq_len, hidden_size = hidden_state.shape
        channel = char_word_mask_matrix.shape[1]
        word_seq_length = word_seq.shape[2]

        embedding = self.word_embedding(word_seq)   # 给ngram编码  [batch_size, channel, word_seq_len, hideen_size]

        tmp = embedding.permute(0, 1, 3, 2)         # [batch_size, channel, ngram_hideen, word_seq_len]

        tmp_hidden_state = torch.stack([hidden_state] * channel, 1)     # [batch_size, channel, character_seq_len, hidden_size]

        # u (batch_size, channel, character_seq_len, word_seq_len)
        u = torch.matmul(tmp_hidden_state, tmp) / self.temper           # [batch_size, channel, character_seq_len, word_seq_len]

        # attention (batch_size, channel, character_seq_len, word_seq_len)
        tmp_word_mask_metrix = torch.clamp(char_word_mask_matrix, 0, 1) # [batch_size, channel, character_seq_len, word_seq_len]
        '''
            tmp_word_mask_metrix表示跟当前字符相关的ngram位置
        '''
        # print("tmp_word_mask_metrix:",tmp_word_mask_metrix.size())
        exp_u = torch.exp(u)
        delta_exp_u = torch.mul(exp_u, tmp_word_mask_metrix)            # 只计算相关ngram的向量，其余位置置零，[batch_size, channel, character_seq,_len, word_seq_len]
        # print("delta_exp_u:", delta_exp_u.size())
        sum_delta_exp_u = torch.stack([torch.sum(delta_exp_u, 3)] * delta_exp_u.shape[3], 3)    # [batch_size, channel, character_seq, word_seq_len]
        # print("sum_delta_exp_u:", sum_delta_exp_u.size())
        attention = torch.div(delta_exp_u, sum_delta_exp_u + 1e-10)     # [batch_size, channel, character_seq, word_seq_len]
        # print("attention:", attention.size())
        attention = attention.view(batch_size * channel, character_seq_len, word_seq_length)
        embedding = embedding.view(batch_size * channel, word_seq_length, hidden_size)

        character_attention = torch.bmm(attention, embedding)

        character_attention = character_attention.view(batch_size, channel, character_seq_len, hidden_size)

        channel_w = self.channel_weight(channel_ids)                    # 初始化每个通道的编码 [batch_size, channel, 1]

        channel_w = nn.Softmax(dim=1)(channel_w)                        # 计算每个通道的权重   [batch_size, channel, 1]

        channel_w = channel_w.view(batch_size, -1, 1, 1)                # [batch_size, channel, 1, 1]

        character_attention = torch.mul(character_attention, channel_w) # [batch_size, channel, character_seq_len, hidden_size], 通过广播的方式，给每个通道的ngram分配不同的值
        character_attention = character_attention.permute(0, 2, 1, 3)   # [batch_size, character_seq_len, channel, hidden_size]
        character_attention = character_attention.flatten(start_dim=2)  # [batch_size, character_seq_len, channel * hidden_size]
        
        return character_attention

# 注意
torch.stack([h1,h2], dim=k)	#在第k维度进行拼接
torch.stack([h1]*channel, dim=k)	#在第k维度进行拼接

2、pytorch中的交叉熵计算
https://blog.csdn.net/qq_28418387/article/details/95918829

import torch
import torch.nn.functional as F
slot_loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100)
# hidden state
slot_logits = torch.tensor([[[0.1, 0.2, 0.3],
                             [0.5, 0.6, 0.4],
                             [0.9, 0.8, 0.7]],
                            [[0.1, 0.2, 0.3],
                             [0.4, 0.5, 0.6],
                             [0.7, 0.8, 0.9]]
                           ])

attention_mask = torch.tensor([[1,0,0],    # [2, 3]
                               [1,1,0]])

slot_labels_ids = torch.tensor([[1,0,0],    # [2, 3]
                               [2,1,0]])
# 由于标签个数为3，所以slot_labels_ids的值只能取0、1、2和-100
active_loss = attention_mask.view(-1) == 1

active_logits = slot_logits.view(-1, 3)[active_loss]    # num_slot_labels = 3
'''
slot_logits.view(-1, 3)
tensor([[0.1000, 0.2000, 0.3000],
        [0.5000, 0.6000, 0.4000],
        [0.9000, 0.8000, 0.7000],
        [0.1000, 0.2000, 0.3000],
        [0.4000, 0.5000, 0.6000],
        [0.7000, 0.8000, 0.9000]])

tensor([[0.1000, 0.2000, 0.3000],
        [0.1000, 0.2000, 0.3000],
        [0.4000, 0.5000, 0.6000]])
        
'''

active_labels = slot_labels_ids.view(-1)[active_loss]   # real label
slot_loss = slot_loss_fct(active_logits, active_labels)

print(active_logits)
# log_softmax=F.log_softmax(active_logits,dim=1)
# print(log_softmax)
print(active_labels)
print(slot_loss)

3、

.join()：    连接字符串数组

4、fastnlp 词表的前两位是什么？

vocab = databundle.get_vocab('chars')
print(vocab.to_word(0))
print(vocab.to_word(1))
print(vocab.to_word(2))
print(vocab.to_word(3))
print(vocab.to_word(4))
print(vocab.to_word(5))
'''
result:
0--><pad>
1--><unk>
2--，
3--的
4--0
5--。
'''

5、广播的计算方法

import torch
import torch.nn as nn
a = torch.tensor([
                  [[0.1]],
                  [[0.2]]
                        ])
b = torch.tensor([[[1, 2, 3],
                   [4, 5, 6]],
                  [[1, 2, 3],
                   [4, 5, 6]]
                  ])
print(a.size())
print(b.size())
res = torch.mul(b, a)
print(res)
torch.Size([2, 1, 1])
torch.Size([2, 2, 3])
'''
tensor([[[0.1000, 0.2000, 0.3000],
         [0.4000, 0.5000, 0.6000]],

        [[0.2000, 0.4000, 0.6000],
         [0.8000, 1.0000, 1.2000]]])
'''

6、
激活函数作用：
1、完成数据的非线性变换，解决线性模型的表达、分类能力不足的问题;
（改变之前数据的线性关系，如果网络中全部是线性变换，则多层网络可以通过矩阵变换，直接转换成一层神经网络。所以激活函数的存在，使得神经网络的“多层”有了实际的意义，使网络更加强大，增加网络的能力，使它可以学习复杂的事物，复杂的数据，以及表示输入输出之间非线性的复杂的任意函数映射。）
2、执行数据的归一化，将输入数据映射到某个范围内，再往下传递，这样做的好处是可以限制数据的扩张，防止数据过大导致的溢出风险。
在少量层结构中, 我们可以尝试很多种不同的激励函数. 在卷积神经网络 Convolutional neural networks 的卷积层中, 推荐的激励函数是 relu. 在循环神经网络中 recurrent neural networks, 推荐的是 tanh 或者是 relu

7、句子中的token被切分时，该怎么获得有效的token

sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
if valid_ids is not None:
	batch_size, max_len, feat_dim = sequence_output.shape
	valid_output = torch.zeros(batch_size, max_len, feat_dim, dtype=sequence_output.dtype, device=input_ids.device)
	for i in range(batch_size):
		temp = sequence_output[i][valid_ids[i] == 1]
		valid_output[i][:temp.size(0)] = temp
sequence_output = self.dropout(valid_output)
...

8、在网络中固定部分参数进行训练
参考链接
https://blog.csdn.net/special_hang/article/details/89676432

class RESNET_attention(nn.Module):
    def __init__(self, model, pretrained):
        super(RESNET_attetnion, self).__init__()
        self.resnet = model(pretrained)
        for p in self.parameters():
            p.requires_grad = False
        self.f = nn.Conv2d(2048, 512, 1)
        self.g = nn.Conv2d(2048, 512, 1)
        self.h = nn.Conv2d(2048, 2048, 1)
        self.softmax = nn.Softmax(-1)
        self.gamma = nn.Parameter(torch.FloatTensor([0.0]))
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.resnet.fc = nn.Linear(2048, 10)
# 使用过滤器过滤-部分参数
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)

9、运行应用程序后，使用以下命令来清楚缓存

'''
释放缓存分配器当前持有的所有未占用的缓存内存，以便这些内存可以在其他GPU应用程序中使用；
并且可以使用nvidia-smi查看
'''
torch.cuda.empty_cache()

9、DataLoader中num_workers的作用
https://www.cnblogs.com/hesse-summer/p/11343870.html
https://stackoverflow.com/questions/53998282/how-does-the-number-of-workers-parameter-in-pytorch-dataloader-actually-work
注：将数据转移到GPU上并不是DataLoader的job

10、transformers中的bertTokenizer

from transformers import BertTokenizer
token = BertTokenizer.tokenize(text)						# 切分tokens
token_ids = BertTokenizer.convert_tokens_to_ids(token)		# 将tokens转化为id，没有cls和sep
# ---------------------------------------
token_ids = BertTokenizer.encode(token)						# 切分tokens并转化为id，包含cls和sep

11、将json字符串美观地写入文件中

g = open(tgt, 'w', encoding='utf-8')
json.dump(triples, fp=g, indent=4, ensure_ascii=False)
g.close()

11、输入文本为"小黑在清华"，do_basic_tokenize=False很关键！

BERTTokenizer(vocab_path, do_lower_case=False, do_basic_tokenize=False)

12、outputs, (hn, cn) = self.lstm(input)
outputs表示每个字符的隐藏层向量，hn表示最后一个字符的隐藏层向量。

'''
如，当输入batch_size=1时，input=[30, 1, 100], 即[seq_len, batch_size, hidden]
那么当模型为Bi-LSTM时，
outputs=[30, 1, 200]	# 维度200表示前向100，拼接后向100
hn=[1, 200]
cn=[1, 200]
并且
outputs[-1, 0, 0:100]   = hn[0, 0:100]
outputs[ 0, 0, 101:200] = hn[0, 101:200]
因为前向最后一个字符是从左到右计算的，而后向最后一个字符是从右往左计算的。
'''

13、计算词向量时用到的“负采样”方法是指：用与“正样本”相同的上下文词，再在字典中随机选择一个单词。
例如：
给定一句话“这是去上学的班车”，则对这句话进行正采样，得到上下文“上”和目标词“学”，则这两个字就是正样本。
负样本的采样需要选定同样的“上”，然后在训练的字典中任意取另一个字，“梦”，这一对就构成负样本。

14、在指定路径下创建虚拟环境

'''
conda create  -p ~/.conda/envs/env_name  python=3.7
'''

15、在[0, 10, 1.1, 1.1, 1.1, 1.1, 1.1]中，由于位置[1]由于权重较大，所以被采样的次数较多。

import torch
from torch.utils.data.sampler import WeightedRandomSampler

# 位置[0]的权重为0，位置[1]的权重为10，其余位置权重均为1.1
weights = torch.Tensor([0, 10, 1.1, 1.1, 1.1, 1.1, 1.1])
wei_sampler = WeightedRandomSampler(weights, 10, replacement=True)
print(list(wei_sampler))
# [1, 1, 1, 4, 1, 4, 4, 1, 1, 6]

16、key-value memory network的核心要素：
（1）对key、value都是用nn 。Embedding()进行随机初始化；
（2）key一般是字符；
（3）使用字符的hidden与字符的key相乘，然后加入value，即可得到最后的结果。

17、PRGC三元组模型中，如何得到Global Correspondence

import torch

torch.manual_seed(1)
sequence_output = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                                [[5, 2, 1], [8, 9, 3], [3, 7, 6]]])
batch, seq_len, hidden = sequence_output.size()
print(sequence_output)

sub_extend = sequence_output.unsqueeze(2).expand(-1, -1, seq_len, -1)
obj_extend = sequence_output.unsqueeze(1).expand(-1, seq_len, -1, -1)
print(sub_extend)
print(obj_extend)

corres_pred = torch.cat([sub_extend, obj_extend], 3)
print(corres_pred)

18、torch.where(a, b, c)

import torch

rel_threshold = 0.5
rel_pred = torch.tensor([[0.1,0.2,0.6,0.7],
                         [0.8,0.9,0.1,0.2]])

"""
torch.where(a, b, c)-->当a满足条件时，选择b；否则选择c。
"""
rel_pred_onehot = torch.where(rel_pred > rel_threshold, torch.ones(rel_pred.size()), torch.zeros(rel_pred.size()))
bs_idxs, pred_rels = torch.nonzero(rel_pred_onehot, as_tuple=True)
"""
tensor([0, 0, 1, 1]) tensor([2, 3, 0, 1])
"""
print(rel_pred_onehot)
print(bs_idxs, pred_rels)

condition = torch.tensor([True, True, False, False, True])
input = torch.tensor([1, 2, 3, 4, 5])
res= torch.where(condition, input, torch.tensor(-100))
print(res)