import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
# 数据准备
batch_size = 64
# Compose组合多个transform操作,Normalize将数据进行标准版,1是均值,2是方差
# 图像PIL或者opencv读取都是(H, W, C),利用ToTensor转换为(C, H, W)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])
# 加载数据集
train_dataset = datasets.MNIST(root='E:\Pytorch Project\dataset', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='E:\Pytorch Project\dataset', train=False, transform=transform, download=True)
train_data = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_data = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
# 搭建网络
class Module(torch.nn.Module):
def __init__(self):
super(Module, self).__init__()
self.linear1 = torch.nn.Linear(784, 512)
self.linear2 = torch.nn.Linear(512, 256)
self.linear3 = torch.nn.Linear(256, 128)
self.linear4 = torch.nn.Linear(128, 64)
self.linear5 = torch.nn.Linear(64, 10)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = x.view(-1, 784)
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.relu(self.linear4(x))
return self.linear5(x)
# 实例化模型
module = Module()
# 损失函数
loss = torch.nn.CrossEntropyLoss()
# 优化器
# 加入动量为0.5,加入一个惯性,优化方向
optimizer = torch.optim.Adam(module.parameters(), lr=0.01)
# 训练
def train(epoch):
running_loss = 0.0
# enumerate 函数返回两个值,1是索引,2是索引对应的内容
# batch_idx 代表进行多少次batch的迭代
for batch_idx, data in enumerate(train_data, 0):
inputs, targets = data
y_pred = module(inputs)
l = loss(y_pred, targets)
optimizer.zero_grad()
l.backward()
optimizer.step()
running_loss += l.item()
# 每300轮输出一次
if batch_idx % 300 == 299:
# 输出 %d代表一位整数,%5d代表五位,位数不够用空格填充
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
# 将损失归零
running_loss = 0.0
# 测试
def test():
correct = 0
total = 0
# 用with不计算梯度
with torch.no_grad():
for data in test_data:
inputs, targets = data
y_pred = module(inputs)
# 返回两个值,_是最大值,predicted是最大值的索引
_, predicted = torch.max(y_pred, dim=1)
# 总数等于标签的size取第0个元素相加得总数
total += targets.size(0)
# 正确预测和标签相等标量求和
correct += (predicted == targets).sum().item()
# %%代表%(转义),%格式化输出的连接符号
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
在使用交叉熵损失函数时,在最后一层不需要用激活函数进行非线性变换。