pytorch 实现Densenet模型 代码详解,计算过程,

这次是对Densenet模型的讲解,这个模型也是需要大家精通掌握的重要模型之一,首先要知道它的网络是怎么传递的,它的出现打破了以往网络设计的思路,是一种非常新颖的设计。
在这里插入图片描述
如图所示,第l层的输入不仅与l-1层的输出相关,还有所有之前层的输出有关。
在这里插入图片描述
大家看这个可能不是很明白,我就仔细的讲解下这里,图中的照片里的板块(红,绿,紫,黄,橙)分别代表不同的特征图,当然红的是输入(input)图,这里为了方便讲解就是X0,X1,X2,X3,X4讲解,当照片输入的时候经过卷积到x1是的时候加上之前x0是完全传递过来了,x1经过卷积到x2的过程中,x1的特征加上了x0的特征,注意看图中的连线一起形成了x2,x2到x3的时候x1,x2又一次相加一起到了x3。图中的连线是重点,可以理解为传递的过程相加了那几条线。
比如growth_rate是增长率16,举个简单计算,(3,16),(19,16),(16+3+19,16),这样densenet的核心就是输入层一直在变化,输出层没有变化
在这里插入图片描述

在这里插入图片描述然后我们需要在大致了解Densenet的设计结构,图中又在次出现重复额板块,所以我们可以将其封装成一个类来写,

def conv_block(in_channel, out_channel):
    layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1, bias=False)
    )
    return layer

这个函数就是方便多次使用

class dense_block(nn.Module):
    def __init__(self, in_channel, growth_rate, num_layers):
        super(dense_block, self).__init__()
        block = []
        channel = in_channel
        for i in range(num_layers):
            block.append(conv_block(channel, growth_rate))
            channel += growth_rate
        self.net = nn.Sequential(*block)
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = torch.cat((out, x), dim=1)
        return x

定义这个类的目的是通过for循环多次构造卷积,in_channel是输入通道数,growth_rate是通道数变化的速度,为什么不是固定?block.append(conv_block(channel, growth_rate))
channel += growth_rate
通道数一直是在变化的,nn.Sequential(*block)这里是将block列表里的类型转换,

def transition(in_channel, out_channel):
    trans_layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, 1),
        nn.AvgPool2d(2, 2)
    )
    return trans_layer

在这里插入图片描述
这里是对应这个板块写的函数,它里面的参数都是固定的,很好理解,这个函数的目的是通过1X1的卷积层减少通道数,并使用步幅为2的平均池化层减半高和宽,降低模型的复杂度。

    def _make_dense_block(self,channels, growth_rate, num):
        block = []
        block.append(dense_block(channels, growth_rate, num))
        channels += num * growth_rate

        return nn.Sequential(*block)
    def _make_transition_layer(self,channels):
        block = []
        block.append(transition(channels, channels // 2))
        return nn.Sequential(*block)

由于图中有很多的dense_block块,transition_layer块,所以我们写成函数的, channels // 2就是为了降低复杂度。

class densenet(nn.Module):
    def __init__(self, in_channel, num_classes, growth_rate=32, block_layers=[6, 12, 24, 16]):
        super(densenet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channel, 64, 7, 2, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(3, 2, padding=1)
            )
        self.DB1 = self._make_dense_block(64, growth_rate,num=block_layers[0])
        self.TL1 = self._make_transition_layer(256)
        self.DB2 = self._make_dense_block(128, growth_rate, num=block_layers[1])
        self.TL2 = self._make_transition_layer(512)
        self.DB3 = self._make_dense_block(256, growth_rate, num=block_layers[2])
        self.TL3 = self._make_transition_layer(1024)
        self.DB4 = self._make_dense_block(512, growth_rate, num=block_layers[3])
        self.global_average = nn.Sequential(
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1)),
        )
        self.classifier = nn.Linear(1024, num_classes)
    def forward(self, x):
        x = self.block1(x)
        x = self.DB1(x)
        x = self.TL1(x)
        x = self.DB2(x)
        x = self.TL2(x)
        x = self.DB3(x)
        x = self.TL3(x)
        x = self.DB4(x)
        x = self.global_average(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x

这是densenet网络的全部流程,对应图片中的结构,大家可能会对self.DB1, self.TL1,里面的参数设置不太了解,这个是我计算出来的,这里的卷积过程可以换成简单写法,我主要是让大家看清楚Densenet网络结构和练习自己的计算能力。

        for i, layers in enumerate(block_layers):
            block.append(dense_block(channels, growth_rate, layers))
            channels += layers * growth_rate
            if i != len(block_layers) - 1:
                # 每经过一个dense_block模块,则在后面添加一个过渡模块,通道数减半channels//2
                block.append(transition(channels, channels // 2))
                channels = channels // 2

原本的写法这样更简单,一样是for循环,这里的block_layers=【6,12,24,16】对应每个版块的重复次数,if i != len(block_layers) - 1通过if 判断一下是不是这个卷积最后一次使用,是的话就直接接上transition Layer然后在进行Dense Block 的构造

全部源码

import torch
from torch import nn

def conv_block(in_channel, out_channel):
    layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1, bias=False)
    )
    return layer

class dense_block(nn.Module):
    def __init__(self, in_channel, growth_rate, num_layers):
        super(dense_block, self).__init__()
        block = []
        channel = in_channel
        for i in range(num_layers):
            block.append(conv_block(channel, growth_rate))
            channel += growth_rate
        self.net = nn.Sequential(*block)
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = torch.cat((out, x), dim=1)
        return x

def transition(in_channel, out_channel):
    trans_layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, 1),
        nn.AvgPool2d(2, 2)
    )
    return trans_layer

class densenet(nn.Module):
    def __init__(self, in_channel, num_classes, growth_rate=32, block_layers=[6, 12, 24, 16]):
        super(densenet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channel, 64, 7, 2, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(3, 2, padding=1)
            )
        self.DB1 = self._make_dense_block(64, growth_rate,num=block_layers[0])
        self.TL1 = self._make_transition_layer(256)
        self.DB2 = self._make_dense_block(128, growth_rate, num=block_layers[1])
        self.TL2 = self._make_transition_layer(512)
        self.DB3 = self._make_dense_block(256, growth_rate, num=block_layers[2])
        self.TL3 = self._make_transition_layer(1024)
        self.DB4 = self._make_dense_block(512, growth_rate, num=block_layers[3])
        self.global_average = nn.Sequential(
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1)),
        )
        self.classifier = nn.Linear(1024, num_classes)
    def forward(self, x):
        x = self.block1(x)
        x = self.DB1(x)
        x = self.TL1(x)
        x = self.DB2(x)
        x = self.TL2(x)
        x = self.DB3(x)
        x = self.TL3(x)
        x = self.DB4(x)
        x = self.global_average(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x

    def _make_dense_block(self,channels, growth_rate, num):
        block = []
        block.append(dense_block(channels, growth_rate, num))
        channels += num * growth_rate

        return nn.Sequential(*block)
    def _make_transition_layer(self,channels):
        block = []
        block.append(transition(channels, channels // 2))
        return nn.Sequential(*block)


net = densenet(3,10)
x = torch.rand(1,3,224,224)
for name,layer in net.named_children():
    if name != "classifier":
        x = layer(x)
        print(name, 'output shape:', x.shape)
    else:
        x = x.view(x.size(0), -1)
        x = layer(x)
        print(name, 'output shape:', x.shape)

训练结果展示图

在这里插入图片描述

可直接运行的训练代码

import torch
from torch import nn

def conv_block(in_channel, out_channel):
    layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1, bias=False)
    )
    return layer

class dense_block(nn.Module):
    def __init__(self, in_channel, growth_rate, num_layers):
        super(dense_block, self).__init__()
        block = []
        channel = in_channel
        for i in range(num_layers):
            block.append(conv_block(channel, growth_rate))
            channel += growth_rate
        self.net = nn.Sequential(*block)
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = torch.cat((out, x), dim=1)
        return x

def transition(in_channel, out_channel):
    trans_layer = nn.Sequential(
        nn.BatchNorm2d(in_channel),
        nn.ReLU(),
        nn.Conv2d(in_channel, out_channel, 1),
        nn.AvgPool2d(2, 2)
    )
    return trans_layer

class densenet(nn.Module):
    def __init__(self, in_channel, num_classes, growth_rate=32, block_layers=[6, 12, 24, 16]):
        super(densenet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channel, 64, 7, 2, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(3, 2, padding=1)
            )
        self.DB1 = self._make_dense_block(64, growth_rate,num=block_layers[0])
        self.TL1 = self._make_transition_layer(256)
        self.DB2 = self._make_dense_block(128, growth_rate, num=block_layers[1])
        self.TL2 = self._make_transition_layer(512)
        self.DB3 = self._make_dense_block(256, growth_rate, num=block_layers[2])
        self.TL3 = self._make_transition_layer(1024)
        self.DB4 = self._make_dense_block(512, growth_rate, num=block_layers[3])
        self.global_average = nn.Sequential(
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1)),
        )
        self.classifier = nn.Linear(1024, num_classes)
    def forward(self, x):
        x = self.block1(x)
        x = self.DB1(x)
        x = self.TL1(x)
        x = self.DB2(x)
        x = self.TL2(x)
        x = self.DB3(x)
        x = self.TL3(x)
        x = self.DB4(x)
        x = self.global_average(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x

    def _make_dense_block(self,channels, growth_rate, num):
        block = []
        block.append(dense_block(channels, growth_rate, num))
        channels += num * growth_rate

        return nn.Sequential(*block)
    def _make_transition_layer(self,channels):
        block = []
        block.append(transition(channels, channels // 2))
        return nn.Sequential(*block)



def Densenet(num_classes):
    return densenet(in_channel=3,num_classes=num_classes)

import time
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt


def load_dataset(batch_size):
    train_set = torchvision.datasets.CIFAR10(
        root="data/cifar-10", train=True,
        download=True, transform=transforms.ToTensor()
    )
    test_set = torchvision.datasets.CIFAR10(
        root="data/cifar-10", train=False,
        download=True, transform=transforms.ToTensor()
    )
    train_iter = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size, shuffle=True, num_workers=4
    )
    test_iter = torch.utils.data.DataLoader(
        test_set, batch_size=batch_size, shuffle=True, num_workers=4
    )
    return train_iter, test_iter


def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, test_iter=None):
    net.train()
    record_train = list()
    record_test = list()

    for epoch in range(num_epochs):
        print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
        total, correct, train_loss = 0, 0, 0
        start = time.time()

        for i, (X, y) in enumerate(train_iter):
            X, y = X.to(device), y.to(device)
            output = net(X)
            loss = criterion(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item()
            train_acc = 100.0 * correct / total

            if (i + 1) % num_print == 0:
                print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
                    .format(i + 1, len(train_iter), train_loss / (i + 1), \
                            train_acc, get_cur_lr(optimizer)))


        if lr_scheduler is not None:
            lr_scheduler.step()

        print("--- cost time: {:.4f}s ---".format(time.time() - start))

        if test_iter is not None:
            record_test.append(test(net, test_iter, criterion, device))
        record_train.append(train_acc)

    return record_train, record_test


def test(net, test_iter, criterion, device):
    total, correct = 0, 0
    net.eval()

    with torch.no_grad():
        print("*************** test ***************")
        for X, y in test_iter:
            X, y = X.to(device), y.to(device)

            output = net(X)
            loss = criterion(output, y)

            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item()

    test_acc = 100.0 * correct / total

    print("test_loss: {:.3f} | test_acc: {:6.3f}%"\
          .format(loss.item(), test_acc))
    print("************************************\n")
    net.train()

    return test_acc


def get_cur_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


def learning_curve(record_train, record_test=None):
    plt.style.use("ggplot")

    plt.plot(range(1, len(record_train) + 1), record_train, label="train acc")
    if record_test is not None:
        plt.plot(range(1, len(record_test) + 1), record_test, label="test acc")

    plt.legend(loc=4)
    plt.title("learning curve")
    plt.xticks(range(0, len(record_train) + 1, 5))
    plt.yticks(range(0, 101, 5))
    plt.xlabel("epoch")
    plt.ylabel("accuracy")

    plt.show()


import torch.optim as optim


BATCH_SIZE = 128
NUM_EPOCHS = 20
NUM_CLASSES = 10
LEARNING_RATE = 0.02
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
NUM_PRINT = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


def main():
    net = Densenet(NUM_CLASSES)
    net = net.to(DEVICE)

    train_iter, test_iter = load_dataset(BATCH_SIZE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        net.parameters(),
        lr=LEARNING_RATE,
        momentum=MOMENTUM,
        weight_decay=WEIGHT_DECAY,
        nesterov=True
    )
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    record_train, record_test = train(net, train_iter, criterion, optimizer, \
          NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, test_iter)

    learning_curve(record_train, record_test)


if __name__ == '__main__':
    main()

后续持续更新新的模型讲解,

  • 21
    点赞
  • 102
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
DenseNet是一种深度神经网络模型,它的主要特点是在网络中引入了密集连接(Dense Connection),使得网络中的信息流动更加充分,从而提高了模型的性能。PyTorch是一个基于Python的深度学习框架,它提供了丰富的工具和接口,方便用户进行模型的构建、训练和测试。 以下是一个基于PyTorch实现DenseNet代码示例: ```python import torch import torch.nn as nn import torch.nn.functional as F class Bottleneck(nn.Module): def __init__(self, in_channels, growth_rate): super(Bottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_channels) self.conv1 = nn.Conv2d(in_channels, 4 * growth_rate, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(4 * growth_rate) self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) def forward(self, x): out = self.conv1(F.relu(self.bn1(x))) out = self.conv2(F.relu(self.bn2(out))) out = torch.cat([out, x], 1) return out class DenseBlock(nn.Module): def __init__(self, in_channels, growth_rate, num_layers): super(DenseBlock, self).__init__() self.layers = nn.ModuleList([Bottleneck(in_channels + i * growth_rate, growth_rate) for i in range(num_layers)]) def forward(self, x): for layer in self.layers: x = layer(x) return x class Transition(nn.Module): def __init__(self, in_channels, out_channels): super(Transition, self).__init__() self.bn = nn.BatchNorm2d(in_channels) self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) self.pool = nn.AvgPool2d(kernel_size=2, stride=2) def forward(self, x): out = self.conv(F.relu(self.bn(x))) out = self.pool(out) return out class DenseNet(nn.Module): def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_classes=100): super(DenseNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.dense1 = DenseBlock(64, growth_rate, block_config[]) self.trans1 = Transition(256, 128) self.dense2 = DenseBlock(128, growth_rate, block_config[1]) self.trans2 = Transition(512, 256) self.dense3 = DenseBlock(256, growth_rate, block_config[2]) self.trans3 = Transition(1024, 512) self.dense4 = DenseBlock(512, growth_rate, block_config[3]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(1024, num_classes) def forward(self, x): out = self.conv1(x) out = self.bn1(out) out = F.relu(out) out = self.pool1(out) out = self.dense1(out) out = self.trans1(out) out = self.dense2(out) out = self.trans2(out) out = self.dense3(out) out = self.trans3(out) out = self.dense4(out) out = self.avgpool(out) out = out.view(out.size(), -1) out = self.fc(out) return out ``` 这个代码实现了一个DenseNet模型,包括Bottleneck、DenseBlock、Transition和DenseNet四个部分。其中Bottleneck是DenseNet中的基本单元,DenseBlock由多个Bottleneck组成,Transition用于进行下采样,DenseNet则由多个DenseBlock和Transition组成。在实现过程中,我们使用了PyTorch提供的各种模块和函数,如nn.Module、nn.ModuleList、nn.BatchNorm2d、nn.Conv2d、nn.AvgPool2d、nn.AdaptiveAvgPool2d、F.relu等,这些工具和接口大大简化了模型的构建过程
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值