引言
ResNet 有效地解决了深度神经网络难以训练的问题,可以训练高达 1000 层的卷积网络。网络之所以难以训练,是因为存在着梯度消失的问题,离 loss 函数越远的层,在反向传播的时候,梯度越小,就越难以更新,随着层数的增加,这个现象越严重。
之前有两种常见的方案来解决这个问题:
1.按层训练,先训练比较浅的层,然后在不断增加层数,但是这种方法效果不是特别好,而且比较麻烦
2.使用更宽的层,或者增加输出通道,而不加深网络的层数,这种结构往往得到的效果又不好ResNet 通过引入了跨层链接解决了梯度回传消失的问题。
这就普通的网络连接跟跨层残差连接的对比图,使用普通的连接,上层的梯度必须要一层一层传回来,而是用残差连接,相当于中间有了一条更短的路,梯度能够从这条更短的路传回来,避免了梯度过小的情况。
假设某层的输入是 x,期望输出是 H(x), 如果我们直接把输入 x 传到输出作为初始结果,这就是一个更浅层的网络,更容易训练,而这个网络没有学会的部分,我们可以使用更深的网络 F(x) 去训练它,使得训练更加容易,最后希望拟合的结果就是 F(x) = H(x) - x,这就是一个残差的结构残差网络的结构就是上面这种残差块的堆叠
代码
train:
import torch
import cv2
import numpy as np
import torch.nn as nn
import net
import uilt
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch import optim
from torchvision.transforms import transforms
##定义一些参数
epoches = 1
batch_size = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def data_tf(x):
x = x.resize((224,224), 2)
x = np.array(x, dtype='float32') / 255
x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
# x = cv2.resize(x, (224, 224))
x = x.transpose((2, 0, 1)) # 将 channel 放到第一维,只是 pytorch 要求的输入方式
x = torch.from_numpy(x)
return x
##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False, transform=data_tf, download=False)
train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model = net.ResNet(3).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-1)
print()
print('目前使用的是: ', device)
uilt.train(epoches, train_data, model, device, criterion, optimizer, pth_name='ResNet')
Net
##如果in_channels == out_channels,则same_shape为TRUE
##如果in_channels != out_channels,则same_shape为FALSE
class BasicBloch(nn.Module):
def __init__(self,in_channels, out_channels, same_shape=True):
super(BasicBloch, self).__init__()
self.same_shape = same_shape
stride = 1 if self.same_shape else 2
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(True)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, 1, 1)
self.bn2 = nn.BatchNorm2d(out_channels)
if not self.same_shape:
self.conv3 = nn.Conv2d(in_channels, out_channels, 1, stride=stride)
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if not self.same_shape:
x = self.conv3(x)
out += x
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, in_channel):
super(ResNet, self).__init__()
self.verbose = None
self.block1 = nn.Conv2d(in_channel, 64, 7, 2)
self.block2 = nn.Sequential(
nn.MaxPool2d(3, 2),
BasicBloch(64, 64),
BasicBloch(64, 64)
)
self.block3 = nn.Sequential(
BasicBloch(64, 128, False),
BasicBloch(128, 128)
)
self.block4 = nn.Sequential(
BasicBloch(128, 256, False),
BasicBloch(256, 256)
)
self.block5 = nn.Sequential(
BasicBloch(256, 512, False),
BasicBloch(512, 512),
nn.AvgPool2d(3)
)
self.classifier = nn.Linear(2048, 10)
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
x = x.view(x.shape[0], -1)
x = self.classifier(x)
return x
uilt
import torch
import time
import tqdm
from torch.autograd import Variable
'''
参数介绍:
epoches, 训练几轮
train_data, 训练的数据
model, 训练模型
device, 使用的设备是
criterion, 损失函数
optimizer, 优化函数
pth_name 参数文件的名字
'''
def train(epoches, train_data, model, device, criterion, optimizer, pth_name):
# 开始训练
losses_men = []
acces_men = []
start = time.time()
for epoche in range(epoches):
train_loss = 0
train_acc = 0
time1 = time.time()
print()
print(f'开始,第 {epoche + 1} / {epoches} 个Epoche中:')
for image_data, image_label in tqdm.tqdm(train_data):
image_data = Variable(image_data.to(device))
image_label = Variable(image_label.to(device))
##前向传播
out = model(image_data)
loss = criterion(out, image_label)
# print(out.shape)
##反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
##记录误差
train_loss += loss.item()
##记录准确率
_, pred_label = out.max(1)
num_correct = (pred_label == image_label).sum().item() ##计算每次batch_size正确的个数
acc = num_correct / out.shape[0]
train_acc += acc
losses_men.append(train_loss / len(train_data))
acces_men.append(train_acc / len(train_data))
time2 = time.time()
torch.save(model.state_dict(), f'./params/{pth_name}_{epoches}.pth')
print(f'Epoch_time : ', time2 - time1)
print()
print('train_loss : ', losses_men)
print()
print('train_acc : ', acces_men)
print(f'All time : {int((time.time() - start) / 3600)} H '
f'{int((time.time() - start) / 60)} m {int((time.time() - start) % 60)} s ')
总结
ResNet网络比起GoogLeNet速度快乐不少(不排除我这个ResNet只有15层的原因),准确率还高。
以下结论,只针对进行一轮训练的结果
设备: 3050Ti + 4G显存
室外温度:38℃
室内温度:36℃
散热器: 风扇 + 3050自带散热器
**************************************************************************************************************
**************************** 训练用时 *********************************** 准确度 **************
**************************************************************************************************************
VGG网络: 32min 0.18532209079283887
GoogLeNet: 9min 0.3277117966751918
ResNet: 6min40s 0.4139617207655863
最后,老婆压场
喜欢的话,给我老婆点个赞吧☺