from torch.utils.data import DataLoader, Dataset, random_split
import torch
a = torch.tensor([[11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99], [11, 22, 33], [44, 55, 66], [77, 88, 99]])
b = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2])
c = torch.tensor([0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2])
#自定义数据集
class myDataSet(Dataset):
def __init__(self,input_ids,mask,label):
self.input_ids =input_ids
self.mask =mask
self.label = label
#数据集长度
def __len__(self):
return len(self.input_ids)
#单个数据
def __getitem__(self,idx):
return self.input_ids[idx],self.mask[idx],self.label[idx]
dataset = myDataSet(a,b,c)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
batch_size=3
train_dataloader = DataLoader(train_dataset,batch_size=batch_size,shuffle =True)
# val_dataloader = DataLoader(val_dataset ,batch_size=batch_size,shuffle =True)
加载自定义pytorch数据集
于 2022-03-17 16:26:15 首次发布