参考视频:2.1 pytorch官方demo(Lenet)_哔哩哔哩_bilibili
LeNet(1998)
笔记
-
Pytorch Tensor的通道排序:[batch,channel,height,width]
-
CIFAR10 dataset: It has the classes: ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’. The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.
-
为什么每计算一个batch就需要调用一次optimizer.zero_grad():
-
pytorch官网:-docs可查看各个函数的用法,-tutorials可以查看示例
-
data = torch.max(outputs, dim=1)
, thendata
will be a tuple containing two tensors.data[0]
: This tensor will contain the maximum values along dimension 1 (the class dimension) of theoutputs
tensor. It will have a shape of[batch]
, wherebatch
is the number of validation images in the batch.data[1]
: This tensor will contain the indices of the maximum values along dimension 1 (the class dimension) of theoutputs
tensor. It will also have a shape of[batch]
, where each element represents the predicted class label (index) for each validation image in the batch.
-
get the size of a tensor along a specific dimension, you use the method
size()
or the propertyshape[]
.import torch # Assuming val_label is a tensor with shape [batch_size, ...] # Using size() method size_along_first_dim = val_label.size(0) # Using shape property size_along_first_dim = val_label.shape[0]
model
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28)
x = self.pool1(x) # output(16, 14, 14)
x = F.relu(self.conv2(x)) # output(32, 10, 10)
x = self.pool2(x) # output(32, 5, 5)
x = x.view(-1, 32*5*5) # output(32*5*5)
x = F.relu(self.fc1(x)) # output(120)
x = F.relu(self.fc2(x)) # output(84)
x = self.fc3(x) # output(10)
return x
train
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
def main():
'''
ToTensor:Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
Normalize: a tensor image with mean and standard deviation.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]
'''
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 50000张训练图片
# 第一次使用时要将download设置为True才会自动去下载数据集
train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
download=False, transform=transform)
#shuffle:是否打乱数据集
train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
shuffle=True, num_workers=0)
# 10000张验证图片
# 第一次使用时要将download设置为True才会自动去下载数据集
val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
download=False, transform=transform)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000,
shuffle=False, num_workers=0)
val_data_iter = iter(val_loader) #将val_loader转化为一个可迭代的迭代器
val_image, val_label = next(val_data_iter) #获取图片和标签值
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(img):
img = img / 2 + 0.5 # unnormalize 返标准化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# print labels
print(' '.join(f'{classes[val_label[j]]:5s}' for j in range(4)))
# show images
imshow(torchvision.utils.make_grid(val_image))
net = LeNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(5): # loop over the dataset multiple times
running_loss = 0.0
for step, data in enumerate(train_loader, start=0):
'''
step最多为50000/36
50000是因为该数据集的训练集有50000张图片,36是在下载数据集时设置的batch_size
'''
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs) #正向传播计算输出
loss = loss_function(outputs, labels)#计算loss
loss.backward()#反向传播
optimizer.step()#参数更新
# print statistics
running_loss += loss.item()
if step % 500 == 499: # print every 500 mini-batches
'''
%:这是模运算符,计算step除以500时的余数。
使用数字 499 而不是 500,以确保在第 500 次迭代之后立即执行操作,而不是在第 501 次迭代之后执行。
'''
with torch.no_grad():
#在接下来的过程中不计算梯度,没有这一行的话在测试过程中也会计算误差损失梯度,会占用很多资源
outputs = net(val_image) # [batch, 10]
predict_y = torch.max(outputs, dim=1)[1]
accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)
print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, step + 1, running_loss / 500, accuracy))
running_loss = 0.0
print('Finished Training')
save_path = './Lenet.pth'
torch.save(net.state_dict(), save_path)
if __name__ == '__main__':
main()
AlexNet(2012)
VGG(2014)
笔记
-
在train时,如果是基于初始化权重进行迁移学习,需要在数据预处理时先将图片减去[123.68,116.78,103.94],这是imagenet的所有图片的三通道的均值,如果是从头训练,不需要减去
-
num_workers:线程数,windows系统只能为0
model
import torch.nn as nn
import torch
# official pretrain weights
model_urls = {
'vgg11': '<https://download.pytorch.org/models/vgg11-bbd30ac9.pth>',
'vgg13': '<https://download.pytorch.org/models/vgg13-c768596a.pth>',
'vgg16': '<https://download.pytorch.org/models/vgg16-397923af.pth>',
'vgg19': '<https://download.pytorch.org/models/vgg19-dcbb9e9d.pth>'
}
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=False):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Linear(512*7*7, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096, num_classes)
)
if init_weights:
self._initialize_weights()
def forward(self, x):
# N x 3 x 224 x 224
x = self.features(x)
# N x 512 x 7 x 7
x = torch.flatten(x, start_dim=1)
# N x 512*7*7
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def make_features(cfg: list):
layers = []
in_channels = 3
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)#非关键字参数
#数字:卷积核个数 'M':池化层
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#A
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#B
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],#D
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],#E
}
def vgg(model_name="vgg16", **kwargs):
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
cfg = cfgs[model_name]
model = VGG(make_features(cfg), **kwargs)
return model
GoogLeNet(2014)
ResNet(2015)