在pytorch中,使用Dataset, DataLoader来自定义训练模型时输入数据。
以下是一个简单的使用实例。
该实例使用的数据集是个list类型, x = [0,1,2,3 … 49 ]
编程语言: python3.6+ , torch
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self):
x = list(range(0,50,1))
self.data = x
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return len(self.data)
dataset = MyDataset()
train_loader = DataLoader(dataset=dataset, batch_size=10,
shuffle=True, num_workers=2)
for i, batch in enumerate(train_loader, 0):
print(i, batch)
print("\n","* "*30, "\n")
class MyDataloader(DataLoader):
def __init__(self, dataset, batch_size, shuffle, num_workers):
super().__init__(
dataset=dataset,
batch_size=batch_size,
collate_fn=self.collate,
shuffle=shuffle,
num_workers=num_workers,
)
def collate(self, data):
return sum(data)
my_train_loader = MyDataloader(dataset=dataset, batch_size=10,
shuffle=True, num_workers=2)
for i, batch in enumerate(my_train_loader, 0):
print(i, batch)
collate_fn: 如何将多个样本数据拼接成一个batch,一般使用默认的拼接方式即可.