torch.permute()和np.transpose()

import torch
import numpy as np
a = np.arange(24).reshape(3,4,2)
print('before', a)
b = np.transpose(a,(1,0,2))
print('b',b)
c = torch.tensor(a)
d = c.permute(1,0,2)
print('d:',d)

out:

/usr/bin/python3 /home/thu/test_python/transpose_permute.py
before [[[ 0  1]
  [ 2  3]
  [ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]
  [12 13]
  [14 15]]

 [[16 17]
  [18 19]
  [20 21]
  [22 23]]]
b [[[ 0  1]
  [ 8  9]
  [16 17]]

 [[ 2  3]
  [10 11]
  [18 19]]

 [[ 4  5]
  [12 13]
  [20 21]]

 [[ 6  7]
  [14 15]
  [22 23]]]
d: tensor([[[ 0,  1],
         [ 8,  9],
         [16, 17]],

        [[ 2,  3],
         [10, 11],
         [18, 19]],

        [[ 4,  5],
         [12, 13],
         [20, 21]],

        [[ 6,  7],
         [14, 15],
         [22, 23]]])

Process finished with exit code 0

结论:转换效果一样,只不过transpose是对np操作,permute是对tensor操作

#增加 多头注意力机制 import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from TCN.tcn import TemporalConvNet,Chomp1d,TemporalBlock import matplotlib.pyplot as plt import time # 配置参数 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SEQ_LENGTH = 120 BATCH_SIZE = 128 # 减小批次以适应注意力计算 EPOCHS = 100 LEARNING_RATE = 5e-5 # 调整学习率 SPLIT_RATIO = 0.8 # 多头时间注意力模块 class MultiHeadTemporalAttention(nn.Module): def __init__(self, embed_size, heads=4): super().__init__() self.embed_size = embed_size self.heads = heads self.head_dim = embed_size // heads self.query = nn.Linear(embed_size, embed_size) self.key = nn.Linear(embed_size, embed_size) self.value = nn.Linear(embed_size, embed_size) self.fc_out = nn.Linear(embed_size, embed_size) def forward(self, x): batch_size, seq_len, _ = x.shape Q = self.query(x).view(batch_size, seq_len, self.heads, self.head_dim).permute(0, 2, 1, 3) K = self.key(x).view(batch_size, seq_len, self.heads, self.head_dim).permute(0, 2, 1, 3) V = self.value(x).view(batch_size, seq_len, self.heads, self.head_dim).permute(0, 2, 1, 3) energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / (self.head_dim ** 0.5) attention = F.softmax(energy, dim=-1) out = torch.matmul(attention, V) out = out.permute(0, 2, 1, 3).contiguous().view(batch_size, seq_len, self.embed_size) return self.fc_out(out) # 带注意力的时序块 class AttentiveTemporalBlock(nn.Module): def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2): super().__init__() self.conv1 = nn.utils.weight_norm(nn.Conv1d( n_inputs, n_outputs, kernel_size, stride=stride, padding=p针对TCN模型改进的 多头注意力机制 的原理是什么 然后使用多头注意力机制进行改进TCN的步骤及流程是什么
03-13
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值