通过VGG进行cifar-10分类:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data.dataloader import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
# 数据加载和处理
"""
设定批量大小
数据加载以及处理
"""
BATCH_SIZE = 128
# 由于torchvision的datasets输入[0,1]的PILmage,所以先归一化为[-1,1]的Tensor
transform = transforms.Compose(
[transforms.ToTensor(), # 转为Tensor
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))] # 归一化分别为RGB三通道的均值和标准差
)
# Load the raw CIFAR-10 data. 在父级目录
train_data = datasets.CIFAR10(root='../data', train=True, download=False, transform=transform)
test_data = datasets.CIFAR10(root='../data', train=False, download=False, transform=transform)
# 通过train_loader把数据传入网络
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)
# 定义神经网络
"""
VGG系列
"""
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self, vgg_name, num_classes=10):
super(VGG, self).__init__()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Sequential(
nn.Linear(in_features=512, out_features=4096), # in_features = 1x1x512
nn.ReLU(True),
nn.Dropout(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(in_features=4096, out_features=num_classes)
)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1) # 1x1x512
out = self.classifier(out)
return F.log_softmax(out, dim=1) # 对每一行进行softmax --- dim = 1轴
def _make_layers(self, cfg):
layers = []
in_channels = 3 # 最开始输入通道为RGB图像,所以3通道
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else: # 一次性向列表中加入三个操作即为一个块
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)] # 当inplace=True的时候,会改变输入数据;当inplace=False的时候,不会改变输入数据
in_channels = x
return nn.Sequential(*layers)
use_gpu = torch.cuda.is_available()
devide = torch.device("cuda" if use_gpu else "cpu")
print('use :', devide)
# 定义神经网络,通过to方法,在CPU和GPU之间移动
net = VGG('VGG16').to(devide)
print(net)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
"""训练网络"""
def train(epoch_num, epochs):
print(f'[{epoch_num:>2}/{epochs}]:', end=' ') # 加f表示格式化字符串,可以在字符串里用{}括起来的变量和表达式
net.train()
train_costs = []
train_accs = []
running_loss = 0.0
correct = 0.0
total = 0
timestart = time.time()
for batch_id, data in enumerate(train_loader, 0):
inputs, labels = data
if use_gpu:
inputs, labels = inputs.cuda(), labels.cuda()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outpus = net(inputs)
loss = criterion(outpus, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
_, predicted = torch.max(outpus.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# iter的训练结果
train_costs.append(running_loss / total)
train_accs.append(correct / total)
print('loss:{:.4f} acc:{:.4f} time:{:.2f}s'.format(running_loss / total, correct / total, time.time() - timestart),
end=' ')
return train_costs, train_accs
def test(epoch):
print(' -->', end=' ')
net.eval()
correct = 0.
batch_loss = 0.
total = 0
test_loss = []
test_acc = []
global best_acc
with torch.no_grad():
for batch_id, data in enumerate(test_loader):
inputs, labels = data
if use_gpu:
inputs, labels = inputs.cuda(), labels.cuda()
outputs = net(inputs)
loss = criterion(outputs, labels)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
batch_loss += loss.item()
total += labels.size(0)
top1_acc = correct / total
print(f'loss: {batch_loss / total:.4f}', end=' ')
print(f'acc@1:{correct}/{total} = {top1_acc:.4f}', end=' ')
with open(os.path.join(savepath, 'acc.txt'), 'a') as f:
f.write('epch:{} top1_{}\n'.format(epoch, top1_acc))
acc = top1_acc
test_loss.append(batch_loss / total) # 保存的是每一批损失累加/总数累加
test_acc.append(acc)
if acc > best_acc:
print('best...')
if not os.path.exists(savepath):
os.makedirs(savepath)
state = {
'epoch': epoch,
'acc': acc,
'feature': net.state_dict(),
}
best_acc = acc
torch.save(state, os.path.join(savepath, 'ckpt.pth'))
with open(os.path.join(savepath, 'best_acc.txt'), 'w') as f:
f.write('Best_epoch{} top1_{}\n'.format(epoch, top1_acc))
return test_loss, test_acc
def draw(d, mode='train'):
import matplotlib.pyplot as plt
loss = d['loss']
acc = d['acc']
iters = len(loss)
plt.plot(range(iters), np.ones(iters), 'r--', label='1')
plt.plot(range(iters), loss, label='loss')
plt.plot(range(iters), acc, label='acc')
plt.title(mode)
if mode == 'train':
plt.xlabel('iter')
else:
plt.xlabel('epoch')
plt.ylabel('loss&acc')
plt.legend()
plt.savefig(os.path.join(savepath, '{}.png'.format(mode)), bbox_inches='tight')
plt.clf()
if __name__ == '__main__':
best_acc = 0.
savepath = './'
EPOCH_NUM = 20
train_ = {'loss': [], 'acc': []}
test_ = {'loss': [], 'acc': []}
start = time.time()
for epoch in range(EPOCH_NUM):
train_loss, train_acc = train(epoch, EPOCH_NUM)
test_loss, test_acc = test(epoch)
train_['loss'] += train_loss
train_['acc'] += train_acc
test_['loss'] += test_loss
test_['acc'] += test_acc
draw(train_, 'train')
draw(test_, 'test')
end = time.time()
print('best_acc:', best_acc)
print(f'total time: {int((end - start) // 60)} m {(end - start) % 60:.2f} s')
运行过程如下:
训练&测试效果: