yolov3 darknet53网络及mobilenet改进附完整pytorch代码

最新推荐文章于 2024-05-24 21:31:37 发布

乒乒乓乓丫

最新推荐文章于 2024-05-24 21:31:37 发布

阅读量2.7w

点赞数 15

本文链接：https://blog.csdn.net/qq_39938666/article/details/89390945

版权

深度学习专栏收录该内容

39 篇文章 10 订阅

订阅专栏

一. 原理介绍

1.darknet53网络结构

基本由1*1与3*3卷积构成，因为网络中有53个卷积层，所以叫做Darknet-53(不包含残差层里的2个卷积)。结构图直接引用一个博主总结的，简洁明了 https://blog.csdn.net/qq_37541097/article/details/81214953#commentBox

2. mobilenet网络介绍

注意好好理解一下卷积nn,Conv2d()里的参数groups的作用；

mobilenet详解可参考这个 https://blog.csdn.net/u011974639/article/details/79199306

二. 代码实现

1. darknet53.py

import torch
import torch.nn as nn
import time

class Conv2d(nn.Module):
    def __init__(self, inc, ouc, k, s, p):
        super(Conv2d, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(inc, ouc, k, s, p),
            nn.BatchNorm2d(ouc),
            nn.LeakyReLU()
        )

    def forward(self, x):
        return self.conv(x)


class ConvSet(nn.Module):  # inc->ouc
    def __init__(self, inc, ouc):
        super(ConvSet, self).__init__()
        self.convset = nn.Sequential(
            Conv2d(inc, ouc, 1, 1, 0),
            Conv2d(ouc, ouc, 3, 1, 1),
            Conv2d(ouc, ouc * 2, 1, 1, 0),
            Conv2d(ouc * 2, ouc * 2, 3, 1, 1),
            Conv2d(ouc * 2, ouc, 1, 1, 0)
        )

    def forward(self, x):
        return self.convset(x)


class Upsampling(nn.Module):
    def __init__(self):
        super(Upsampling, self).__init__()

    def forward(self, x):
        return nn.functional.interpolate(x, scale_factor=2, mode='nearest')


class Downsampling(nn.Module):
    def __init__(self, inc, ouc):
        super(Downsampling, self).__init__()
        self.d = nn.Sequential(
            Conv2d(inc, ouc, 3, 2, 1)
        )

    def forward(self, x):
        return self.d(x)


class Residual(nn.Module):  # inc->inc
    def __init__(self, inc):
        super(Residual, self).__init__()
        self.r = nn.Sequential(
            Conv2d(inc, inc // 2, 1, 1, 0),
            Conv2d(inc // 2, inc, 3, 1, 1)
        )

    def forward(self, x):
        return x + self.r(x)


class MainNet(nn.Module):
    def __init__(self):
        super(MainNet, self).__init__()

        self.d52 = nn.Sequential(
            Conv2d(3, 32, 3, 1, 1),  # 416
            Conv2d(32, 64, 3, 2, 1),  # 208

            # 1x
            Conv2d(64, 32, 1, 1, 0),
            Conv2d(32, 64, 3, 1, 1),
            Residual(64),

            Downsampling(64, 128),  # 104

            # 2x
            Conv2d(128, 64, 1, 1, 0),
            Conv2d(64, 128, 3, 1, 1),
            Residual(128),

            Conv2d(128, 64, 1, 1, 0),
            Conv2d(64, 128, 3, 1, 1),
            Residual(128),

            Downsampling(128, 256),  # 52

            # 8x
            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256),

            Conv2d(256, 128, 1, 1, 0),
            Conv2d(128, 256, 3, 1, 1),
            Residual(256)
        )

        self.d26 = nn.Sequential(
            Downsampling(256, 512),  # 26

            # 8x
            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512),

            Conv2d(512, 256, 1, 1, 0),
            Conv2d(256, 512, 3, 1, 1),
            Residual(512)
        )

        self.d13 = nn.Sequential(
            Downsampling(512, 1024),  # 13

            # 4x
            Conv2d(1024, 512, 1, 1, 0),
            Conv2d(512, 1024, 3, 1, 1),
            Residual(1024),

            Conv2d(1024, 512, 1, 1, 0),
            Conv2d(512, 1024, 3, 1, 1),
            Residual(1024),

            Conv2d(1024, 512, 1, 1, 0),
            Conv2d(512, 1024, 3, 1, 1),
            Residual(1024),

            Conv2d(1024, 512, 1, 1, 0),
            Conv2d(512, 1024, 3, 1, 1),
            Residual(1024)
        )
        '---------------------------------------------------------'

        self.convset_13 = nn.Sequential(
            ConvSet(1024, 512)
        )

        self.detection_13 = nn.Sequential(
            Conv2d(512, 512, 3, 1, 1),
            nn.Conv2d(512, 18, 1, 1, 0)  # ?????????????????18
        )

        self.conv_13 = nn.Sequential(
            Conv2d(512, 256, 1, 1, 0)
        )

        self.up_to_26 = nn.Sequential(
            Upsampling()
        )
        '---------------------------------------------------------'

        self.convset_26 = nn.Sequential(
            ConvSet(768, 512)  # 经concat，通道相加512+256=768
        )

        self.detection_26 = nn.Sequential(
            Conv2d(512, 512, 3, 1, 1),
            nn.Conv2d(512, 18, 1, 1, 0)
        )

        self.conv_26 = nn.Sequential(
            Conv2d(512, 256, 1, 1, 0)
        )

        self.up_to_52 = nn.Sequential(
            Upsampling()
        )
        '---------------------------------------------------------'

        self.convset_52 = nn.Sequential(
            ConvSet(512, 512)  # 经concat，通道相加256+256=512
        )

        self.detection_52 = nn.Sequential(
            Conv2d(512, 512, 3, 1, 1),
            nn.Conv2d(512, 18, 1, 1, 0)
        )

    def forward(self, x):
        x_52 = self.d52(x)
        x_26 = self.d26(x_52)
        x_13 = self.d13(x_26)

        x_13_ = self.convset_13(x_13)
        out_13 = self.detection_13(x_13_)  # 13*13输出

        y_13_ = self.conv_13(x_13_)
        y_26 = self.up_to_26(y_13_)
        '----------------------------------------------------------'

        y_26_cat = torch.cat((y_26, x_26), dim=1)  # 26*26连接
        x_26_ = self.convset_26(y_26_cat)
        out_26 = self.detection_26(x_26_)

        y_26_ = self.conv_26(x_26_)
        y_52 = self.up_to_52(y_26_)
        '----------------------------------------------------------'

        y_52_cat = torch.cat((y_52, x_52), dim=1)
        x_52_ = self.convset_52(y_52_cat)
        out_52 = self.detection_52(x_52_)

        return out_13, out_26, out_52


if __name__ == '__main__':
    trunk = MainNet()
    # print(sum(param.numel() for param in trunk.parameters()))
    trunk.eval()
    trunk.cuda().half()
    x = torch.cuda.HalfTensor(1, 3, 128, 128)

    y_13, y_26, y_52 = trunk(x)
    # print(y_13.shape)
    # print(y_26.shape)
    # print(y_52.shape)

    for _ in range(15):
        start_time = time.time()
        trunk(x)
        end_time = time.time()
        print(end_time - start_time)
        print("===================================")

用时如下，0.04s多：

2. (改进版)mobilenetv1.py

辅助理解 https://blog.csdn.net/MOU_IT/article/details/84955532

import torch
import time


class MobileLayer(torch.nn.Module):

    def __init__(self, in_channels, out_channels, stride=1):
        super(MobileLayer, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride

        '1*1用的(1,1,0)—不变, 3*3用的(3,1,1)—不变和(3,2,1)—减半'
        self.sub_module = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False), #深度卷积
            torch.nn.BatchNorm2d(in_channels),
            torch.nn.LeakyReLU(0.1, True),

            torch.nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), #点卷积
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.LeakyReLU(0.1, True)
        )

    def forward(self, x):
        if self.stride == 1 and self.in_channels == self.out_channels: #此处照顾了yolov3需要残差结构，故v1也加了残差
            return self.sub_module(x) + x
        else:
            return self.sub_module(x)

class UpsampleLayer(torch.nn.Module):

    def __init__(self):
        super(UpsampleLayer, self).__init__()

    def forward(self, x):
        return torch.nn.functional.interpolate(x, scale_factor=2, mode='bilinear')

class ConvolutionalSet(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvolutionalSet, self).__init__()

        self.sub_module = torch.nn.Sequential(
            MobileLayer(in_channels, out_channels),
            MobileLayer(out_channels, out_channels),
        )

    def forward(self, x):
        return self.sub_module(x)

class MainNet(torch.nn.Module):

    def __init__(self):
        super(MainNet, self).__init__()

        self.trunk_52 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 32, 3, 1, 1),  # 416
            torch.nn.Conv2d(32, 64, 1, 2, 0),  # 208，下采样

            MobileLayer(64, 64), #每当self.stride == 1 and self.in_channels == self.out_channels，执行残差

            MobileLayer(64, 128, 2),  # 104, 下采样

            # MobileLayer(128, 128),
            # MobileLayer(128, 128),
            MobileLayer(128, 128),

            MobileLayer(128, 256, 2),  # 52，下采样
        )

        self.trunk_26 = torch.nn.Sequential(
            # MobileLayer(256, 256, 3),
            # MobileLayer(256, 256, 3),
            MobileLayer(256, 256),

            MobileLayer(256, 512, 2),  # 26，下采样
        )

        self.trunk_13 = torch.nn.Sequential(
            # MobileLayer(512, 512),
            # MobileLayer(512, 512),
            MobileLayer(512, 512),

            MobileLayer(512, 1024, 2),  # 13，下采样
        )

        self.convset_13 = torch.nn.Sequential(
            ConvolutionalSet(1024, 512)
        )

        self.detetion_13 = torch.nn.Sequential(
            # MobileLayer(512, 512, 3),
            torch.nn.Conv2d(512, 21, 3, 1, 1)
        )

        self.up_26 = torch.nn.Sequential(
            MobileLayer(512, 256),
            UpsampleLayer()
        )

        self.convset_26 = torch.nn.Sequential(
            ConvolutionalSet(768, 256)
        )

        self.detetion_26 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 21, 3, 1, 1)
        )

        self.up_52 = torch.nn.Sequential(
            MobileLayer(256, 128),
            UpsampleLayer()
        )

        self.convset_52 = torch.nn.Sequential(
            ConvolutionalSet(384, 128)
        )

        self.detetion_52 = torch.nn.Sequential(
            MobileLayer(128, 128),
            torch.nn.Conv2d(128, 21, 3, 1, 1)
        )

    def forward(self, x):
        # start_time = time.time()
        h_52 = self.trunk_52(x)
        h_26 = self.trunk_26(h_52)
        h_13 = self.trunk_13(h_26)

        convset_out_13 = self.convset_13(h_13)
        detetion_out_13 = self.detetion_13(convset_out_13)

        # end_time = time.time()
        # print("........................",end_time - start_time)

        up_out_26 = self.up_26(convset_out_13)
        route_out_26 = torch.cat((up_out_26, h_26), dim=1)
        convset_out_26 = self.convset_26(route_out_26)
        detetion_out_26 = self.detetion_26(convset_out_26)

        up_out_52 = self.up_52(convset_out_26)
        route_out_52 = torch.cat((up_out_52, h_52), dim=1)
        convset_out_52 = self.convset_52(route_out_52)
        detetion_out_52 = self.detetion_52(convset_out_52)

        return detetion_out_13, detetion_out_26, detetion_out_52


if __name__ == '__main__':
    trunk = MainNet()
    # print(sum(param.numel() for param in trunk.parameters()))
    trunk.eval()
    trunk.cuda().half()
    x = torch.cuda.HalfTensor(1, 3, 128, 128)

    y_13, y_26, y_52 = trunk(x)
    # print(y_13.shape)
    # print(y_26.shape)
    # print(y_52.shape)

    for _ in range(15):
        start_time = time.time()
        trunk(x)
        end_time = time.time()
        print(end_time - start_time)
        print("===================================")

用时如下，将近0.02s：