1 背景
1.1 引入原因
研究表明,非常深的ResNet中,越往后,其层数的增加对效果的提升越少,因此超深ResNet中的层数可以随机丢掉,类似于循环神经网络。因此DenseNet基于这一特性,不在盲目的增加网络层数,而是参考ResNet出现的问题,ResNet将每一个模块的输出和原始输入数据相加作为最后的输出,而DenseNet是将每一个模块中,每一个卷积层的输入都是来自其之前所有卷积层的输出,而其输出也是都传递给它之后的每一个卷积层作为输入。
同时,这里将多个结果作为输入并不是进行简单的相加,而是在通道上进行链接,这也缩短了每一层的参数和Loss函数的距离,增加了模型的训练速度,减少了梯度问题,这也是该模型的一大优势。
1.2 主要工作
- 数据集使用的是CIFAR、SVHN和ImageNet
- 对比的模型主要是ResNet变体和不同层数的DenseNet
结果:
先看表格注释在看表格数据
结论:
- 在当时,DenseNet在三个数据集上的错误率是最低的,达到了模型的目的;
- 可以很明确的看到,相同深度的DenseNet,数据增强会拉低模型效果,可见数据增强不一定都好使;
- 这个文章出来,作者自己跑的模型结果是带
*
号的,做的东西还是很多的,在横向上是用了不同的数据集进行对比,在纵向上,使用的是不同的模型进行对比,结果都是最优的,这个不就是我们发论文最想要看的结果么!
文章之后就是和ResNet在ImageNet数据集上的结果对比,效果也是很明显的!
附上别人已经解析透透的模型blog了,DenseNet是2017年顶会的oral,别人2017年就开始了解这个了,我2017年还在上高中啊,技术还是得多关注新的啊!
2 源码分享
2.1 手搓DenseNet
class _DenseLayer(nn.Sequential): # 实现其中的DenseBlock的网络层基础
"""Basic unit of DenseBlock (using bottleneck layer) """
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
super(_DenseLayer, self).__init__()
self.add_module("norm1", nn.BatchNorm2d(num_input_features))
self.add_module("relu1", nn.ReLU(inplace=True))
self.add_module("conv1", nn.Conv2d(num_input_features, bn_size*growth_rate,
kernel_size=1, stride=1, bias=False))
self.add_module("norm2", nn.BatchNorm2d(bn_size*growth_rate))
self.add_module("relu2", nn.ReLU(inplace=True))
self.add_module("conv2", nn.Conv2d(bn_size*growth_rate, growth_rate,
kernel_size=3, stride=1, padding=1, bias=False))
self.drop_rate = drop_rate
def forward(self, x):
new_features = super(_DenseLayer, self).forward(x)
if self.drop_rate > 0:
new_features = F.dropout(new_features, p=self.drop_rate)
# 在通道维上将输入和输出连结
return torch.cat([x, new_features], 1)
class _DenseBlock(nn.Sequential): # 通过遍历方法,实现DenseBlock中的密集连接操作
"""DenseBlock"""
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
super(_DenseBlock, self).__init__()
for i in range(num_layers):
layer = _DenseLayer(num_input_features+i*growth_rate, growth_rate, bn_size,
drop_rate)
self.add_module("denselayer%d" % (i+1), layer)
class _Transition(nn.Sequential): # 实现Transition层,用来跟在DenseBlock后减少空间维度,降低参数量
"""Transition layer between two adjacent DenseBlock"""
def __init__(self, num_input_feature, num_output_features):
super(_Transition, self).__init__()
self.add_module("norm", nn.BatchNorm2d(num_input_feature))
self.add_module("relu", nn.ReLU(inplace=True))
self.add_module("conv", nn.Conv2d(num_input_feature, num_output_features,
kernel_size=1, stride=1, bias=False))
self.add_module("pool", nn.AvgPool2d(2, stride=2))
class DenseNet(nn.Module): # 实现DenseNet
"DenseNet-BC model"
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64,
bn_size=4, compression_rate=0.5, drop_rate=0, num_classes=1000):
"""
:param growth_rate: 增长率,即K=32
:param block_config: 每一个DenseBlock的layers数量,这里实现的是DenseNet-121
:param num_init_features: 第一个卷积的通道数一般为2*K=64
:param bn_size: bottleneck中1*1conv的factor=4,1*1conv输出的通道数一般为factor*K=128
:param compression_rate: 压缩因子
:param drop_rate: dropout层将神经元置0的概率,为0时表示不使用dropout层
:param num_classes: 分类数
"""
super(DenseNet, self).__init__()
# first Conv2d
self.features = nn.Sequential(OrderedDict([
("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
("norm0", nn.BatchNorm2d(num_init_features)),
("relu0", nn.ReLU(inplace=True)),
("pool0", nn.MaxPool2d(3, stride=2, padding=1))
]))
# DenseBlock
num_features = num_init_features
for i, num_layers in enumerate(block_config):
block = _DenseBlock(num_layers, num_features, bn_size, growth_rate, drop_rate)
self.features.add_module("denseblock%d" % (i + 1), block)
num_features += num_layers*growth_rate
if i != len(block_config) - 1:
transition = _Transition(num_features, int(num_features*compression_rate))
self.features.add_module("transition%d" % (i + 1), transition)
num_features = int(num_features * compression_rate)
# final bn+ReLU
self.features.add_module("norm5", nn.BatchNorm2d(num_features))
self.features.add_module("relu5", nn.ReLU(inplace=True))
# classification layer
self.classifier = nn.Linear(num_features, num_classes)
# params initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1)
elif isinstance(m, nn.Linear):
nn.init.constant_(m.bias, 0)
def forward(self, x):
features = self.features(x)
out = F.avg_pool2d(features, 7, stride=1).view(features.size(0), -1)
out = self.classifier(out)
return out
DenseNet
这个是代码原出处!
2.2 正确使用方法
加载torchviosn.models中的DenseNet模型,可用的如下:
除了DenseNet不行,其他几个都可以,
2.2.1使用densenet121进行的分类模型:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# 前期准备工作 以下仅供参考
train_transforms = transforms.Compose([
transforms.RandomRotation(10), # Rotation (-10,109)
transforms.RandomHorizontalFlip(), # HorizontalFlip by 0.5 ratio
transforms.Resize(227),
transforms.CenterCrop(227),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406], # Three channesl by data-mean/std (mean, std)
[0.229,0.224,0.225])
])
# 第一步,传入类别文件夹的父文件夹,以及需要做的数据变换
dataset = datasets.ImageFolder(root="../input/medical-mnist", transform=train_transforms)
# 第二步 设置训练集和测试集的占比
train_indices, test_indices = train_test_split(list(range(len(dataset.targets))), train_size=0.8, test_size=0.2, stratify=dataset.target_transform)
# 第三步 传入全部数据集 以及索引,得到数据处理后的最终结果
train_dataset = torch.utils.data.Subset(dataset, train_indices) # 形成训练集
test_dataset = torch.utils.data.Subset(dataset, test_indices) # 形成测试集
# 第四步 使用数据加载器进行数据加载,得到可以传入模型的数据集
train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=12)
from torchvision.models import DenseNet, densenet121
import torch.nn as nn
class Net(nn.Module):
def __init__(self, NUMCLASS, pretrain=False):
super(Net, self).__init__()
self.fc = densenet121(pretrain)
self.fc.classifier = nn.Linear(1024, NUMCLASS)
def forward(self, x):
y = self.fc(x)
return y
# def count_parameters(model):
# params = [p.numel() for p in model.parameters() if p.requires_grad]
# # for item in params:
# # print(f'{item:>8}')
# print(f'________\n{sum(params):>8}')
# count_parameters(net)
# Parameters
NUM_CLASSES = 6
EPOCH = 150
LR = 0.001
# Initializtion
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Net(NUM_CLASSES, True).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=LR)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
import time
start_time = time.time()
train_losses = []
test_losses = []
train_acc = []
test_acc = []
for i in range(EPOCH):
start_time = time.time()
total_train_loss = 0
total_train_acc = 0
for idx, (x_train, y_train) in enumerate(train_loader):
x_train, y_train = x_train.to(device), y_train.to(device)
y_pred = net(x_train)
loss = criterion(y_pred, y_train)
total_train_loss += loss.item()
acc = (y_pred.argmax(1) == y_train).sum()
total_train_acc += acc
optimizer.zero_grad()
loss.backward()
optimizer.step()
lr_scheduler.step()
train_losses.append(total_train_loss)
train_acc.append(total_train_acc/len(train_dataset))
total_test_loss = total_test_acc = 0
with torch.no_grad():
for idx, (x_test, y_test) in enumerate(test_loader):
x_test, y_test = x_test.to(device), y_test.to(device)
y_pred = net(x_test)
loss = criterion(y_pred, y_test)
total_test_loss += loss.item()
acc = (y_pred.argmax(1) == y_test).sum()
total_test_acc += acc
test_losses.append(total_test_loss)
test_acc.append(total_test_acc/len(test_dataset))
end_time = time.time()
print(f"{i+1}/{EPOCH}, time:{end_time-start_time} \t train_loss:{total_train_loss}\t train_acc:{total_train_acc/len(train_dataset)}\t test_loss:{total_test_loss} \t test_acc:{total_test_acc/len(test_dataset)}")
部分结果:
一轮10分钟,出道即巅峰!
2.2.2 使用densenet121进行的分割模型:
以前不太懂BackBone的作用,后来在一篇分割模型中,看到了使用DenseNet作为特征提取器,作为UNet的Encoder部分,效果还是不错的,以下给出参考用法:
1.查看DenseNet的网络结构:
2. 选择需要的模块:
传入自己的数据,看看每一个模块的输出大小和通道维数!!
3.进行模型组装和改进:
4. 给出该经典模型的Backbone结构
附上官方源码:
DenseNet