目录
数据集
数据集使用的是CIFAR10,cifar 10 这个数据集一共有 50000 张训练集,10000 张测试集,两个数据集里面的图片都是 png 彩色图片,图片大小是 32 x 32 x 3,一共是 10 分类问题,分别为飞机、汽车、鸟、猫、鹿、狗、青蛙、马、船和卡车。这个数据集是对网络性能测试一个非常重要的指标,可以说如果一个网络在这个数据集上超过另外一个网络,那么这个网络性能上一定要比另外一个网络好,目前这个数据集最好的结果是 95% 左右的测试集准确率。
from torchvision.datasets import CIFAR10
import torch
import cv2
import numpy as np
def data_tf(x):
x = np.array(x, dtype='float32') / 255
x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
x = cv2.resize(x, (224, 224))
x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
x = torch.from_numpy(x)
return x
##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False, transform=data_tf, download=False)
Net
这是GoogLeNet的网络结构
class BasicConv2d(nn.Module):
def __init__(self,in_channels, out_channels, kernel, stride=1, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels,
kernel_size=kernel, stride=stride, padding=padding,
bias=False)
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return F.relu(x, inplace=True)
'''
in_channels 输入数据的通道
out_channels_1x1 1*1卷积深度
out_channels_1x1_3 3*3前面的1*1卷积深度
out_channels_3x3 3*3卷积深度
out_channels_1x1_5 5*5前面的1*1卷积深度
out_channels_5x5 5*5卷积深度
out_channels_pool 池化后面的1*1卷积深度
'''
class Inception(nn.Module):
def __init__(self, in_channels, out_channels_1x1,
out_channels_1x1_3, out_channels_3x3,
out_channels_1x1_5, out_channels_5x5,
out_channels_pool ):
super(Inception, self).__init__()
##第一条线
self.branch1x1 = BasicConv2d(in_channels, out_channels_1x1, 1)
##第二条线
self.branch3x3 = nn.Sequential(
BasicConv2d(in_channels, out_channels_1x1_3, 1),
BasicConv2d(out_channels_1x1_3, out_channels_3x3, 3, 1, 1)
)
##第三条线
self.branch5x5 = nn.Sequential(
BasicConv2d(in_channels, out_channels_1x1_5, 1),
BasicConv2d(out_channels_1x1_5, out_channels_5x5, 5, 1, 2)
)
##第四条线
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, out_channels_pool, 1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
output = [branch1x1, branch3x3, branch5x5, branch_pool]
return torch.cat(output, 1)
class GoogLeNet(nn.Module):
def __init__(self, in_channels, num_class):
super(GoogLeNet, self).__init__()
##第 1 个模块
self.block1 = nn.Sequential(
nn.Conv2d(in_channels, 64, 7, 2, 3),
nn.MaxPool2d(3, 2, 1)
)
##第 2 个模块
self.block2 = nn.Sequential(
nn.Conv2d(64, 192, 3, 1, 1),
nn.Conv2d(192, 192, 3, 1, 1),
nn.MaxPool2d(3, 2, 1)
)
##第 3 个模块
self.block3 = nn.Sequential(
Inception(192, 64, 96, 128, 16, 32, 32),
Inception(256, 128, 128, 192, 32, 96, 64),
nn.MaxPool2d(3, 2, 1)
)
##第 4 个模块
self.block4 = nn.Sequential(
Inception(480, 192, 96, 208, 16, 48, 64),
Inception(512, 160, 112, 224, 24, 64, 64), #这里究极体会输出
Inception(512, 128, 128, 256, 24, 64, 64),
Inception(512, 112, 144, 288, 32, 64, 64),
Inception(528, 256, 160, 320, 32, 128, 128), #这里究极体会输出
nn.MaxPool2d(3, 2, 1)
)
##第 4 个模块
self.block5 = nn.Sequential(
Inception(832, 256, 160, 320, 32, 128, 128),
Inception(832, 384, 192, 384, 48, 128, 128),
nn.AvgPool2d(7, 1)
)
self.classifier = nn.Sequential(
nn.Dropout(0.4),
nn.Linear(1024, num_class),
# nn.Sigmoid(1024,out_channels)
)
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
x = torch.reshape(x, (x.shape[0], -1))
x = self.classifier(x)
return x
详细内容可以查看我这篇博客
接下来是把训练的代码模块化在uilt中
uilt
# 开发人员: 骆根强
# 开发时间: 2022/7/30 10:50
# 功能作用: 未知
import torch
import time
import tqdm
from torch.autograd import Variable
'''
参数介绍:
epoches, 训练几轮
train_data, 训练的数据
model, 训练模型
device, 使用的设备是
criterion, 损失函数
optimizer, 优化函数
pth_name 参数文件的名字
'''
def train(epoches, train_data, model, device, criterion, optimizer, pth_name):
# 开始训练
losses_men = []
acces_men = []
start = time.time()
for epoche in range(epoches):
train_loss = 0
train_acc = 0
time1 = time.time()
print()
print(f'开始,第 {epoche + 1} / {epoches} 个Epoche中:')
for image_data, image_label in tqdm.tqdm(train_data):
image_data = Variable(image_data.to(device))
image_label = Variable(image_label.to(device))
##前向传播
out = model(image_data)
loss = criterion(out, image_label)
# print(out.shape)
##反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
##记录误差
train_loss += loss.item()
##记录准确率
_, pred_label = out.max(1)
num_correct = (pred_label == image_label).sum().item() ##计算每次batch_size正确的个数
acc = num_correct / out.shape[0]
train_acc += acc
losses_men.append(train_loss / len(train_data))
acces_men.append(train_acc / len(train_data))
time2 = time.time()
torch.save(model.state_dict(), f'./params/{pth_name}_{epoches}.pth')
print(f'Epoch_time : ', time2 - time1)
print()
print('train_loss : ', losses_men)
print()
print('train_acc : ', acces_men)
print(f'All time : {int((time.time() - start) / 3600)} H '
f'{int((time.time() - start) / 60)} m {int((time.time() - start) % 60)} s ')
然后就进行完整的训练过程
train
import torch
import cv2
import numpy as np
import torch.nn as nn
import four_net
import uilt
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch import optim
##定义一些参数
epoches = 1
batch_size = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def data_tf(x):
x = np.array(x, dtype='float32') / 255
x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
x = cv2.resize(x, (224, 224))
x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
x = torch.from_numpy(x)
return x
##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False, transform=data_tf, download=False)
train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model = four_net.GoogLeNet(3,10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-1)
print()
print('目前使用的是: ', device)
uilt.train(epoches, train_data, model, device, criterion, optimizer, pth_name='googlenet'
总结
****************************************************************************************************************************************************************************************************************************
train_acc : [0.4877117966751918, 0.7165121483375959, 0.8027293797953964, 0.8466272378516624, 0.8776974104859335, 0.9011149296675192, 0.9178388746803069, 0.9341432225063938, 0.9479699488491049, 0.9559223145780051, 0.9642543158567775, 0.9674312659846548, 0.9730458759590793, 0.9789402173913043, 0.9809782608695652, 0.984934462915601, 0.9881713554987213, 0.9888507033248082, 0.9883112212276215, 0.9879515664961637, 0.9934063299232737, 0.9944453324808185, 0.9930466751918159, 0.9949048913043478, 0.9937859654731458, 0.9963834718670077, 0.9977221867007673, 0.998321611253197, 0.9973025895140665, 0.9927070012787724]
画出准确度曲线:
import matplotlib.pyplot as plt
acces_men = [0.4877117966751918, 0.7165121483375959, 0.8027293797953964,
0.8466272378516624, 0.8776974104859335, 0.9011149296675192,
0.9178388746803069, 0.9341432225063938, 0.9479699488491049,
0.9559223145780051, 0.9642543158567775, 0.9674312659846548,
0.9730458759590793, 0.9789402173913043, 0.9809782608695652,
0.984934462915601, 0.9881713554987213, 0.9888507033248082,
0.9883112212276215, 0.9879515664961637, 0.9934063299232737,
0.9944453324808185, 0.9930466751918159, 0.9949048913043478,
0.9937859654731458, 0.9963834718670077, 0.9977221867007673,
0.998321611253197, 0.9973025895140665, 0.9927070012787724]
###画出LOSS曲线和准确率曲线
plt.plot(np.arange(len(acces_men)), acces_men, label ='train acc')
plt.show()
GoogLeNet模型训练的收敛速度比VGG还快,光是第一个epoch,准确率就达到了0.3,要知道VGG(经过BatchNorm1d)是经过了第三个epoch才超过0.3的,如果Vgg没有BatchNorm1d那就是个弟弟,经过20轮的训练精确度连0.1都达不到
GoogLeNet经过20轮精确度高到99%
最后,老婆压场
喜欢的话,给我老婆点个赞吧☺