这是softmax那层的函数,保证每一个输出都是正数,并且和为1
loss = np.exp(z)/np.exp(z).sum() 这是对应的损失函数
神经网络最后一步不用做激活,而PyTorch已经提供了一个包含softmax和计算loss的CrossEntropyLoss
与之对应的限制,必须保证y是一个longTensor
Python在读取图像时,会读取[0,255],但是神经网络对[0,1]分布训练效果最好,所以我们要把他转化成[0,1]的tensor
用transforms的ToTensor来实现
计算图如下:
import torch from torchvision import transforms # 图像处理 from torchvision import datasets from torch.utils.data import DataLoader import torch.nn.functional as F # 包含relu import torch.optim as optim # 优化器 # 全连接神经网络 # 准备数据集 batch_size = 64 transform = transforms.Compose([ transforms.ToTensor(), # 1.转变成Tensor张量 transforms.Normalize((0.1307,),(0.3081))# 2.转化为[0,1]的浮点数 均值,标准差,是对MNIST数据集计算过后得到的经验值 ]) train_dataset = datasets.MNIST(root=r'D:\set', train=True, download=True, transform=transform) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) test_dataset = datasets.MNIST(root=r'D:\set', train=False, download=True, transform=transform) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size) # 构造模型 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = torch.nn.Linear(784, 512) self.l2 = torch.nn.Linear(512, 256) self.l3 = torch.nn.Linear(256, 128) self.l4 = torch.nn.Linear(128, 64) self.l5 = torch.nn.Linear(64, 10) def forward(self, x): x = x.view(-1, 784) # -1其实就是自动获取mini_batch x = F.relu(self.l1(x)) x = F.relu(self.l2(x)) x = F.relu(self.l3(x)) x = F.relu(self.l4(x)) return self.l5(x) # 最后一层不做激活,不进行非线性变换 model = Net() # 损失函数和优化器 criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # 把一轮循环封装到一个函数里 def train(epoch): running_loss = 0.0 for batch_idx, data in enumerate(train_loader, 0): # 获得一个批次的数据和标签 inputs, target = data optimizer.zero_grad() # 清零不能忘记 # 获得模型预测结果(64, 10) outputs = model(inputs) # 交叉熵代价函数outputs(64,10),target(64) loss = criterion(outputs, target) loss.backward() optimizer.step() running_loss += loss.item() # 记得item,否则会构建计算图 # 每三百轮输出一次 if batch_idx % 300 == 299: print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300)) running_loss = 0.0 # 定义一个测试函数 def test(): correct = 0 total = 0 with torch.no_grad(): # 下面不需要计算 for data in test_loader: # 拿数据 images, labels = data outputs = model(images) # 得到一个n行10列的矩阵 _, predicted = torch.max(outputs.data, dim=1) # dim = 1 列是第0个维度,行是第1个维度 返回最大值和最大值下标两个量 total += labels.size(0) correct += (predicted == labels).sum().item() # 张量之间的比较运算 print('accuracy on test set: %d %% ' % (100*correct/total)) if __name__ == '__main__': for epoch in range(10): train(epoch) test()