1、DataSet 是抽象类,不能实例化对象,主要是用于构造我们的数据集
2、梯度下降用的全部样本,提升计算速度;随机梯度下降用其中一个样本,可以克服鞍点,但时间长。所以我们用mini-batch来均衡
3、内层用batch进行迭代,epoch是所有的样本进行前向传反向传播,即所有样本都参与训练
4、__getitem__目的是为支持下标(索引)操作
代码实现过程:
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader
class DiabetesDataset(Dataset):
def __init__(self,filepath):
xy = np.loadtxt(filepath,delimiter = ',',dtype=np.float32) #获取数据据
self.len = xy.shape[0] #取数据集长度
self.x_data = torch.from_numpy(xy[:,:-1]) #取x数据,不要最后一列
self.y_data = torch.from_numpy(xy[:,[-1]]) #取y数据,只要最后一列
def __getitem__(self,index):
return self.x_data[index],self.y_data[index] #支持下标索引操作
def __len__(self):
return self.len #返回长度
dataset = DiabetesDataset("diabetes.csv.gz") #加载数据集
train_loader = DataLoader(dataset = dataset, #设定参数
batch_size = 32, #batch设为32
shuffle = True, #打乱数据
num_workers = 2) #双线程
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8,6)
self.linear2 = torch.nn.Linear(6,4)
self.linear3 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(size_average = True)
optimizer = torch.optim.SGD(model.parameters(),lr = 0.01)
if __name__ == '__main__':
for epoch in range (100):
for i,(inputs,labels)in enumerate(train_loader,0):
y_pred = model(inputs)
loss = criterion(y_pred,labels)
print(epoch,i,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()