dataset和TensorDataset,以及自定义dataset的使用
import torch
from torch.utils.data import Dataset,TensorDataset,DataLoader
import numpy as np
x=np.random.rand(10,2)
y=np.random.rand(10,1)
print(x.shape,y.shape)
# 转成numpy
trainx=torch.from_numpy(x)
trainy=torch.from_numpy(y)
print(trainx.shape)
# 使用TensorDataset封装数据
train_dataset=TensorDataset(trainx,trainy)
# 使用DataLoader来封装数据集迭代过程
train_loader=DataLoader(dataset=train_dataset,
batch_size=2,
shuffle=True,
num_workers=0)
for i,data in enumerate(train_loader):
x,y=data
print('第{}个,x的大小{},y的大小{}'.format(i,x.shape,y.shape))
继承dataset
from torch.utils.data import Dataset
# 重写这个类,便于初始化数据或者计算误差等
class MyDataset(Dataset):
'''
下载数据,初始化数据都可以在这完成,可以自定义输出的y
'''
def __init__(self,x,y):
self.x=x
self.y=y
self.len=x.shape[0]
# 返回数据
def __getitem__(self, index):
return self.x[index],self.y[index],self.x[index]+self.y[index]
def __len__(self):
return self.len
import torch
from torch.utils.data import Dataset,TensorDataset,DataLoader
import numpy as np
x=np.random.rand(10,2)
y=np.random.rand(10,1)
print(x.shape,y.shape)
# 转成numpy
trainx=torch.from_numpy(x)
trainy=torch.from_numpy(y)
print(trainx.shape)
# 使用TensorDataset封装数据
train_dataset=TensorDataset(trainx,trainy)
# 使用DataLoader来封装数据集迭代过程
train_loader=DataLoader(dataset=train_dataset,
batch_size=2,
shuffle=True,
num_workers=0)
for i,data in enumerate(train_loader):
x,y=data
print('第{}个,x的大小{},y的大小{}'.format(i,x.shape,y.shape))
from newDAta import MyDataset
mydataset=MyDataset(trainx,trainy)
train_loader2=DataLoader(dataset=mydataset,
batch_size=2,
shuffle=True,
num_workers=0)
for i,data in enumerate(train_loader2):
x,y,z=data
print('2第{}个,x的大小{},y的大小{},x+y的大小{}'.format(i,x.shape,y.shape,z.shape))