使用Pytorch的dataloader时报错每个tensor的维度不一样

使用pytorch的dataloader报错:RuntimeError: stack expects each tensor to be equal size, but got [2] at entry 0 and [1] at entry 1

1. 问题描述

报错定位:位于定义dataset的代码中

def __getitem__(self, index):
	...
	return y    #此处报错

报错内容:

  File "D:\python\lib\site-packages\torch\utils\data\_utils\collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [2] at entry 0 and [1] at entry 1

把前一行的报错带上能够更清楚地明白问题在哪里.

2.问题分析

从报错可以看到,是在代码中执行torch.stack时发生了报错.因此必须要明白在哪里执行了stack操作.
通过调试可以发现,在通过loader加载一个batch数据的时候,是通过每一次给一个随机的index取出相应的向量.那么最终要形成一个batch的数据就必须要进行拼接操作,而torch.stack就是进行这里所说的拼接.
再来看看具体报的什么错: 说是stack的向量维度不同. 这说明在每次给出一个随机的index,返回的y向量的维度应该是相同的,而我们这里是不同的.
这样解决方法也就明确了:使返回的向量y的维度固定下来.

3.问题出处

为什么我会出现这样的一个问题,是因为我的特征向量中存在multi-hot特征.而为了节省空间,我是用一个列表存储这个特征的.示例如下:

feature=[[1,3,5],
		[0,2],
		[1,2,5,8]]

这就导致了我每次返回的向量的维度是不同的.因此可以采用向量补全的方法,把不同长度的向量补全成等长的.

	# 把所有向量的长度都补为6
	multi = np.pad(multi, (0, 6-multi.shape[0]), 'constant', constant_values=(0, -1))

4.总结

  • 在构建dataset重写的__getitem__方法中要返回相同长度的tensor.
  • 可以使用向量补全的方法来解决这个问题.
  • 20
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
好的,下面是一个基于PyTorch框架实现的Transformer模型的实战案例代码,以机器翻译为例: ```python import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader # 定义超参数 MAX_LENGTH = 10 # 最大句子长度 BATCH_SIZE = 64 # 批量大小 EMBEDDING_DIM = 256 # 词向量维度 HIDDEN_DIM = 512 # 隐藏层维度 N_LAYERS = 6 # Transformer层数 N_HEADS = 8 # 注意力头数 DROPOUT = 0.1 # Dropout概率 LEARNING_RATE = 0.0005 # 学习率 N_EPOCHS = 10 # 训练轮数 # 定义数据集 class TranslationDataset(Dataset): def __init__(self, path): self.pairs = [] with open(path, 'r', encoding='utf-8') as f: for line in f: src, tgt = line.strip().split('\t') self.pairs.append((src, tgt)) def __len__(self): return len(self.pairs) def __getitem__(self, index): src, tgt = self.pairs[index] src_tokens = [int(x) for x in src.split()] tgt_tokens = [int(x) for x in tgt.split()] src_len = len(src_tokens) tgt_len = len(tgt_tokens) src_padding = [0] * (MAX_LENGTH - src_len) tgt_padding = [0] * (MAX_LENGTH - tgt_len) src_tokens += src_padding tgt_tokens += tgt_padding return (torch.LongTensor(src_tokens), torch.LongTensor(tgt_tokens)) # 定义Transformer模型 class Transformer(nn.Module): def __init__(self, input_dim, output_dim, embedding_dim, hidden_dim, n_layers, n_heads, dropout): super().__init__() self.input_embedding = nn.Embedding(input_dim, embedding_dim) self.output_embedding = nn.Embedding(output_dim, embedding_dim) self.encoder = Encoder(embedding_dim, hidden_dim, n_layers, n_heads, dropout) self.decoder = Decoder(embedding_dim, hidden_dim, n_layers, n_heads, dropout) self.output_projection = nn.Linear(hidden_dim, output_dim) def forward(self, src, tgt): src_embedded = self.input_embedding(src) tgt_embedded = self.output_embedding(tgt) encoder_output, encoder_attention = self.encoder(src_embedded) decoder_output, decoder_attention = self.decoder(tgt_embedded, encoder_output) output = self.output_projection(decoder_output) return output, encoder_attention, decoder_attention class Encoder(nn.Module): def __init__(self, embedding_dim, hidden_dim, n_layers, n_heads, dropout): super().__init__() self.layers = nn.ModuleList([EncoderLayer(embedding_dim, hidden_dim, n_heads, dropout) for _ in range(n_layers)]) self.dropout = nn.Dropout(dropout) def forward(self, x): attention_weights = [] for layer in self.layers: x, attention = layer(x) attention_weights.append(attention) return x, torch.stack(attention_weights) class EncoderLayer(nn.Module): def __init__(self, embedding_dim, hidden_dim, n_heads, dropout): super().__init__() self.self_attention = MultiHeadAttention(embedding_dim, n_heads) self.layer_norm1 = nn.LayerNorm(embedding_dim) self.positionwise_feedforward = PositionwiseFeedforward(embedding_dim, hidden_dim, dropout) self.layer_norm2 = nn.LayerNorm(embedding_dim) self.dropout = nn.Dropout(dropout) def forward(self, x): residual = x x, attention = self.self_attention(x, x, x) x = self.layer_norm1(residual + self.dropout(x)) residual = x x = self.positionwise_feedforward(x) x = self.layer_norm2(residual + self.dropout(x)) return x, attention class Decoder(nn.Module): def __init__(self, embedding_dim, hidden_dim, n_layers, n_heads, dropout): super().__init__() self.layers = nn.ModuleList([DecoderLayer(embedding_dim, hidden_dim, n_heads, dropout) for _ in range(n_layers)]) self.dropout = nn.Dropout(dropout) def forward(self, x, encoder_output): attention_weights = [] for layer in self.layers: x, attention = layer(x, encoder_output) attention_weights.append(attention) return x, torch.stack(attention_weights) class DecoderLayer(nn.Module): def __init__(self, embedding_dim, hidden_dim, n_heads, dropout): super().__init__() self.self_attention = MultiHeadAttention(embedding_dim, n_heads) self.layer_norm1 = nn.LayerNorm(embedding_dim) self.encoder_attention = MultiHeadAttention(embedding_dim, n_heads) self.layer_norm2 = nn.LayerNorm(embedding_dim) self.positionwise_feedforward = PositionwiseFeedforward(embedding_dim, hidden_dim, dropout) self.layer_norm3 = nn.LayerNorm(embedding_dim) self.dropout = nn.Dropout(dropout) def forward(self, x, encoder_output): residual = x x, self_attention = self.self_attention(x, x, x) x = self.layer_norm1(residual + self.dropout(x)) residual = x x, encoder_attention = self.encoder_attention(x, encoder_output, encoder_output) x = self.layer_norm2(residual + self.dropout(x)) residual = x x = self.positionwise_feedforward(x) x = self.layer_norm3(residual + self.dropout(x)) return x, encoder_attention class MultiHeadAttention(nn.Module): def __init__(self, embedding_dim, n_heads): super().__init__() self.embedding_dim = embedding_dim self.n_heads = n_heads self.head_dim = embedding_dim // n_heads self.q_linear = nn.Linear(embedding_dim, embedding_dim) self.k_linear = nn.Linear(embedding_dim, embedding_dim) self.v_linear = nn.Linear(embedding_dim, embedding_dim) self.out_linear = nn.Linear(embedding_dim, embedding_dim) def forward(self, query, key, value): batch_size = query.size(0) Q = self.q_linear(query).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2) K = self.k_linear(key).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2) V = self.v_linear(value).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2) attention_weights = torch.matmul(Q, K.transpose(-2, -1)) / (self.head_dim ** 0.5) attention_weights = F.softmax(attention_weights, dim=-1) output = torch.matmul(self.dropout(attention_weights), V) output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.embedding_dim) output = self.out_linear(output) return output, attention_weights class PositionwiseFeedforward(nn.Module): def __init__(self, embedding_dim, hidden_dim, dropout): super().__init__() self.fc1 = nn.Linear(embedding_dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim, embedding_dim) self.dropout = nn.Dropout(dropout) def forward(self, x): x = F.relu(self.fc1(x)) x = self.dropout(x) x = self.fc2(x) return x # 定义训练函数 def train(model, iterator, optimizer, criterion): model.train() epoch_loss = 0 for src, tgt in iterator: optimizer.zero_grad() output, _, _ = model(src, tgt[:, :-1]) output_dim = output.shape[-1] output = output.contiguous().view(-1, output_dim) tgt = tgt[:, 1:].contiguous().view(-1) loss = criterion(output, tgt) loss.backward() optimizer.step() epoch_loss += loss.item() return epoch_loss / len(iterator) # 定义评估函数 def evaluate(model, iterator, criterion): model.eval() epoch_loss = 0 with torch.no_grad(): for src, tgt in iterator: output, _, _ = model(src, tgt[:, :-1]) output_dim = output.shape[-1] output = output.contiguous().view(-1, output_dim) tgt = tgt[:, 1:].contiguous().view(-1) loss = criterion(output, tgt) epoch_loss += loss.item() return epoch_loss / len(iterator) # 加载数据集 train_dataset = TranslationDataset('train.txt') valid_dataset = TranslationDataset('valid.txt') train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE) # 初始化模型和优化器 model = Transformer(input_dim=10000, output_dim=10000, embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, n_layers=N_LAYERS, n_heads=N_HEADS, dropout=DROPOUT) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.CrossEntropyLoss(ignore_index=0) # 训练模型 for epoch in range(N_EPOCHS): train_loss = train(model, train_loader, optimizer, criterion) valid_loss = evaluate(model, valid_loader, criterion) print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Valid Loss: {valid_loss:.3f}') ``` 上述代码实现了一个简单的机器翻译模型,使用PyTorch框架和Transformer架构。具体而言,它使用了一个包含6个Encoder层和6个Decoder层的Transformer模型,其中每个层都包含了自注意力和多头注意力机制。该模型使用了Adam优化器和交叉熵损失函数进行训练。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值