1.导入数据包
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
torch.__version__
from torch.utils.data import DataLoader
2.定义训练次数和GPU
EPOCHS=10 # 总共训练批次
BATCH_SIZE=512 #大概需要2G的显存 batch_size指定我们每次装载的数据个数
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
本文是参考一篇博客,然后又根据自己的风格改动了其中的一些代码编写方式,感觉这样看起来更舒服、易懂。
3.下载手写体数据集
使用PyTorch中自带的torchvision.datasets方法下载手写数字的数据集。
torch.utils.data.DataLoader是PyTorch中数据读取的一个重要接口,该接口定义在dataloader.py脚本中,只要是用PyTorch来训练模型基本都会用到该接口。该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口(如dataset)的输入按照batch size封装成Tensor,作为模型的输入。
可以from torch.utils.data import DataLoader,torch.utils.data.DataLoade只需要写成DataLoade即可。
transform = transforms.ToTensor() ##converts a PIL.Image or numpy.ndarray
#to torch.FloatTensor(CHW) in range(0.0,1.0)
train_dataset = datasets.MNIST(root ="./data",train = True,transform = transform,download = True)
test_dataset = datasets.MNIST(root = "./data",train = False, transform = transform,download = True)
train_loader = DataLoader(dataset = train_dataset,batch_size=512,shuffle = True)
test_loader = DataLoader(dataset = test_dataset,batch_size=512,shuffle = True)
或者嵌套程序
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(root ="./data", train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), #Normalize:对具有平均值和标准差的张量图像进行归一化
batch_size=512, shuffle=True)#shuffle设置为True表明我们装载到模型中的输入数据是被随机打乱顺序的。
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=512, shuffle=True)
4.定义网络模型
## 方法一:
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1,10,kernel_size=5),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.layer2 = nn.Sequential(
nn.Conv2d(10, 20, kernel_size=3),
nn.ReLU(True),
)
self.classifier = nn.Sequential(
nn.Linear(20*10*10,500),
nn.ReLU(True),
nn.Linear(500, 10),
)
def forward(self, x):
# print(x.size(0)) #x.size(0)指batchsize的值。简化x = x.view(batchsize, -1)。
covn1_out = self.layer1(x)
covn2_out = self.layer2(covn1_out)
out = covn2_out.view(x.size(0), -1) #指转换后有batchsize行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。
out = self.classifier(out)
out = F.log_softmax(out, dim=1)
return out
## 方法二:
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
# 1,28x28
self.conv1=nn.Conv2d(1,10,5) # 10, 24x24 输入通道:1;输出通道10,卷积核大小:5 (28-5)+1 =24
self.conv2=nn.Conv2d(10,20,3) # 128, 10x10 输入通道:10;输出通道20,卷积核大小:(12-3)+1 =10
self.fc1 = nn.Linear(20*10*10,500)
self.fc2 = nn.Linear(500,10)
def forward(self,x):
out = self.conv1(x) #24
out = F.relu(out)
out = F.max_pool2d(out, 2, 2) #12 kernel_size=2,stride=2 (24-2)/2 + 1=12
out = self.conv2(out) #10
out = F.relu(out)
out = out.view(x.size(0),-1)
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
out = F.log_softmax(out,dim=1)
return out
5.实例化网络并定义优化器
方法一:采用 Adam优化器
model = ConvNet().to(DEVICE) #实例化网络,实例化后使用.to方法将网络移动到GPU
optimizer = optim.Adam(model.parameters())
方法二:采用SGD优化器
model =ConvNet().cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) #lr=0.01 学习率
6.训练样本集并进行测试
#训练过程
def train(model, device, train_loader, optimizer, epoch):
model.train()
total = 0
correct = 0
for batch_idx, (data, label) in enumerate(train_loader):## 使用enumerate访问可遍历的数组对象
data, label = data.to(device), label.to(device)
# print(batch_idx) #batch_idx=0 1 ..... 117(60000/512 = 117)从0开始计数,batch_idx=2,表示训练了3组的BATCH_SIZE=512,共1536个数据了
output = model(data) #输入训练数据
train_loss = F.nll_loss(output, label) #计算误差
_, pred = torch.max(output.data, 1)
correct += torch.sum(pred == label.data) #计算准确度
optimizer.zero_grad() # 清空上一次的梯度
total += label.size(0) #更新训练样本数
train_loss.backward() #误差反向传递
optimizer.step() #优化器参数更新
# print(len(train_loader))#118(60000/512~118) 共118组数据,每组512个数据
# print(len(train_loader.dataset)) #60000
if(batch_idx+1)%30 == 0: #没训练30组(30*512=15360个)数据输出1次结果
print('Train epoch{}/{}: [{}/{} ({:.0f}%)]\ttrain_loss: {:.6f},train accuracy:({:.0f}%)'.format(
epoch,EPOCHS, total, len(train_loader.dataset),
100. * batch_idx / len(train_loader),
train_loss.item(), #train_loss
100 * correct / total)) #train accuracy
#epoch=1, batch_idx=29,len(data)=512 29*512:已经训练的数据数目,再化成百分数%
#测试过程
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
# pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
pred = torch.max(output.data ,1)[1]
correct += pred.eq(target.view_as(pred)).sum().item()#0.852 0.0555
#法二 计算准确度
# _, pred = torch.max(output.data, 1)
# correct += torch.sum(pred == target.data) #loss=0.097 0.054
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
#调用执行上述函数
for epoch in range(1, EPOCHS + 1):
train(model, DEVICE, train_loader, optimizer, epoch)
test(model, DEVICE, test_loader)
上述方法是把训练、测试过程定义成一个函数,后续再调用执行
# 方法二:
#.训练数据集
for epoch in range(EPOCHS):
#训练:
running_loss = 0.0
running_correct = 0
for batch_idx, (data, label) in enumerate(train_loader):
# print(batch_idx) #117
data, label = data.to(DEVICE), label.to(DEVICE)
output = model(data) # 输入训练数据
loss = F.nll_loss(output, label) # 计算输出和实际输出之间的误差
optimizer.zero_grad() # 每次反向传播前都要清空上一次的梯度
loss.backward() # 误差反向传递
optimizer.step() # 优化器参数更新
#输出训练结果
if (batch_idx + 1) % 50 == 0: #没隔50个输出一次 batch_idx从1开始
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch+1, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.item()))
test_loss = 0
correct = 0
#用测试集进行测试:
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE)
output = model(data)
pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
correct += pred.eq(target.view_as(pred)).sum().item()
#输出测试结果
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss,
correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
上述两种方法本质上只有所选用的优化器不同,其它的就是编写方法的不同。两种方法的最终输出结果有微小的差异。