08.加载数据集(dataest and dataLoader)
-------------------------------------------------
构造加载数据集
mini batch
batch是一个一个小组
-------------------------------------------------------
准备数据,构造模型,构造损失和优化器,训练循环
import numpy as np import torch from torch.utils.data import Dataest,DataLoader class DiabetesDataset(Dataest): #继承自dataest def __init__(self,filepath): #filepath是文件来自哪里 xy = np.loadtxt('diabetes.csv.gz', delimiter=',', dtype=np.float32) # 逗号分割 self.len = xy.shape[0] #n行几列到时候直接把n取出来就ok了 self.x_data = torch.from_numpy(xy[:, :-1]) # 最后一列不要 self.y_data = torch.from_numpy(xy[:, [-1]]) # 要-1那一列,加中括号是为了保证拿出来的那一列是矩阵 def __getitem__(self, index): #支持索引 return self.x_data[index],self.y_data[index] def __len__(self): #数据条数进行反馈 return len dataest = DiabetesDataset('diabetes.csv.gz') train_loader = DataLoader(dataset=dataest,batch_size=32,shuffle=True,num_workers=2) #加载器:数据集,大小,是否打乱,要几个并行 class Model(torch.nn.Module): def __init__(self): super(Model,self).__init__() self.linear1 = torch.nn.Linear(8, 6) self.linear2 = torch.nn.Linear(6, 4) self.linear3 = torch.nn.Linear(4, 1) # 维度依次降低 self.sigmoid = torch.nn.Sigmoid() def forward(self, x): x = self.sigmoid(self.linear1(x)) # 每次都重新定义x避免一次有错误而难以查出 x = self.sigmoid(self.linear2(x)) x = self.sigmoid(self.linear3(x)) return x model = Model() criterion = torch.nn.BCELoss(size_average=True) optimizer = torch.optim.SGD(model.parameters(),lr=0.1) for epoch in range(100): for i,data in enumerate(train_loader,0): #直接对train_loader做迭代 #1.准备数据、 inputs,labels = data #x,y # forward y_pred = model(inputs) loss = criterion(y_pred, labels) print(epoch,i, loss.item()) # backward optimizer.zero_grad() loss.backward() # updata optimizer.step()