一. 原理介绍
1.darknet53网络结构
基本由1*1与3*3卷积构成,因为网络中有53个卷积层,所以叫做Darknet-53(不包含残差层里的2个卷积)。结构图直接引用一个博主总结的,简洁明了 https://blog.csdn.net/qq_37541097/article/details/81214953#commentBox
2. mobilenet网络介绍
注意好好理解一下卷积nn,Conv2d()里的参数groups的作用;
mobilenet详解可参考这个 https://blog.csdn.net/u011974639/article/details/79199306
二. 代码实现
1. darknet53.py
import torch
import torch.nn as nn
import time
class Conv2d(nn.Module):
def __init__(self, inc, ouc, k, s, p):
super(Conv2d, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(inc, ouc, k, s, p),
nn.BatchNorm2d(ouc),
nn.LeakyReLU()
)
def forward(self, x):
return self.conv(x)
class ConvSet(nn.Module): # inc->ouc
def __init__(self, inc, ouc):
super(ConvSet, self).__init__()
self.convset = nn.Sequential(
Conv2d(inc, ouc, 1, 1, 0),
Conv2d(ouc, ouc, 3, 1, 1),
Conv2d(ouc, ouc * 2, 1, 1, 0),
Conv2d(ouc * 2, ouc * 2, 3, 1, 1),
Conv2d(ouc * 2, ouc, 1, 1, 0)
)
def forward(self, x):
return self.convset(x)
class Upsampling(nn.Module):
def __init__(self):
super(Upsampling, self).__init__()
def forward(self, x):
return nn.functional.interpolate(x, scale_factor=2, mode='nearest')
class Downsampling(nn.Module):
def __init__(self, inc, ouc):
super(Downsampling, self).__init__()
self.d = nn.Sequential(
Conv2d(inc, ouc, 3, 2, 1)
)
def forward(self, x):
return self.d(x)
class Residual(nn.Module): # inc->inc
def __init__(self, inc):
super(Residual, self).__init__()
self.r = nn.Sequential(
Conv2d(inc, inc // 2, 1, 1, 0),
Conv2d(inc // 2, inc, 3, 1, 1)
)
def forward(self, x):
return x + self.r(x)
class MainNet(nn.Module):
def __init__(self):
super(MainNet, self).__init__()
self.d52 = nn.Sequential(
Conv2d(3, 32, 3, 1, 1), # 416
Conv2d(32, 64, 3, 2, 1), # 208
# 1x
Conv2d(64, 32, 1, 1, 0),
Conv2d(32, 64, 3, 1, 1),
Residual(64),
Downsampling(64, 128), # 104
# 2x
Conv2d(128, 64, 1, 1, 0),
Conv2d(64, 128, 3, 1, 1),
Residual(128),
Conv2d(128, 64, 1, 1, 0),
Conv2d(64, 128, 3, 1, 1),
Residual(128),
Downsampling(128, 256), # 52
# 8x
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256),
Conv2d(256, 128, 1, 1, 0),
Conv2d(128, 256, 3, 1, 1),
Residual(256)
)
self.d26 = nn.Sequential(
Downsampling(256, 512), # 26
# 8x
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512),
Conv2d(512, 256, 1, 1, 0),
Conv2d(256, 512, 3, 1, 1),
Residual(512)
)
self.d13 = nn.Sequential(
Downsampling(512, 1024), # 13
# 4x
Conv2d(1024, 512, 1, 1, 0),
Conv2d(512, 1024, 3, 1, 1),
Residual(1024),
Conv2d(1024, 512, 1, 1, 0),
Conv2d(512, 1024, 3, 1, 1),
Residual(1024),
Conv2d(1024, 512, 1, 1, 0),
Conv2d(512, 1024, 3, 1, 1),
Residual(1024),
Conv2d(1024, 512, 1, 1, 0),
Conv2d(512, 1024, 3, 1, 1),
Residual(1024)
)
'---------------------------------------------------------'
self.convset_13 = nn.Sequential(
ConvSet(1024, 512)
)
self.detection_13 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1),
nn.Conv2d(512, 18, 1, 1, 0) # ?????????????????18
)
self.conv_13 = nn.Sequential(
Conv2d(512, 256, 1, 1, 0)
)
self.up_to_26 = nn.Sequential(
Upsampling()
)
'---------------------------------------------------------'
self.convset_26 = nn.Sequential(
ConvSet(768, 512) # 经concat,通道相加512+256=768
)
self.detection_26 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1),
nn.Conv2d(512, 18, 1, 1, 0)
)
self.conv_26 = nn.Sequential(
Conv2d(512, 256, 1, 1, 0)
)
self.up_to_52 = nn.Sequential(
Upsampling()
)
'---------------------------------------------------------'
self.convset_52 = nn.Sequential(
ConvSet(512, 512) # 经concat,通道相加256+256=512
)
self.detection_52 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1),
nn.Conv2d(512, 18, 1, 1, 0)
)
def forward(self, x):
x_52 = self.d52(x)
x_26 = self.d26(x_52)
x_13 = self.d13(x_26)
x_13_ = self.convset_13(x_13)
out_13 = self.detection_13(x_13_) # 13*13输出
y_13_ = self.conv_13(x_13_)
y_26 = self.up_to_26(y_13_)
'----------------------------------------------------------'
y_26_cat = torch.cat((y_26, x_26), dim=1) # 26*26连接
x_26_ = self.convset_26(y_26_cat)
out_26 = self.detection_26(x_26_)
y_26_ = self.conv_26(x_26_)
y_52 = self.up_to_52(y_26_)
'----------------------------------------------------------'
y_52_cat = torch.cat((y_52, x_52), dim=1)
x_52_ = self.convset_52(y_52_cat)
out_52 = self.detection_52(x_52_)
return out_13, out_26, out_52
if __name__ == '__main__':
trunk = MainNet()
# print(sum(param.numel() for param in trunk.parameters()))
trunk.eval()
trunk.cuda().half()
x = torch.cuda.HalfTensor(1, 3, 128, 128)
y_13, y_26, y_52 = trunk(x)
# print(y_13.shape)
# print(y_26.shape)
# print(y_52.shape)
for _ in range(15):
start_time = time.time()
trunk(x)
end_time = time.time()
print(end_time - start_time)
print("===================================")
用时如下,0.04s多:
2. (改进版)mobilenetv1.py
辅助理解 https://blog.csdn.net/MOU_IT/article/details/84955532
import torch
import time
class MobileLayer(torch.nn.Module):
def __init__(self, in_channels, out_channels, stride=1):
super(MobileLayer, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
'1*1用的(1,1,0)—不变, 3*3用的(3,1,1)—不变和(3,2,1)—减半'
self.sub_module = torch.nn.Sequential(
torch.nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False), #深度卷积
torch.nn.BatchNorm2d(in_channels),
torch.nn.LeakyReLU(0.1, True),
torch.nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), #点卷积
torch.nn.BatchNorm2d(out_channels),
torch.nn.LeakyReLU(0.1, True)
)
def forward(self, x):
if self.stride == 1 and self.in_channels == self.out_channels: #此处照顾了yolov3需要残差结构,故v1也加了残差
return self.sub_module(x) + x
else:
return self.sub_module(x)
class UpsampleLayer(torch.nn.Module):
def __init__(self):
super(UpsampleLayer, self).__init__()
def forward(self, x):
return torch.nn.functional.interpolate(x, scale_factor=2, mode='bilinear')
class ConvolutionalSet(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super(ConvolutionalSet, self).__init__()
self.sub_module = torch.nn.Sequential(
MobileLayer(in_channels, out_channels),
MobileLayer(out_channels, out_channels),
)
def forward(self, x):
return self.sub_module(x)
class MainNet(torch.nn.Module):
def __init__(self):
super(MainNet, self).__init__()
self.trunk_52 = torch.nn.Sequential(
torch.nn.Conv2d(3, 32, 3, 1, 1), # 416
torch.nn.Conv2d(32, 64, 1, 2, 0), # 208,下采样
MobileLayer(64, 64), #每当self.stride == 1 and self.in_channels == self.out_channels,执行残差
MobileLayer(64, 128, 2), # 104, 下采样
# MobileLayer(128, 128),
# MobileLayer(128, 128),
MobileLayer(128, 128),
MobileLayer(128, 256, 2), # 52,下采样
)
self.trunk_26 = torch.nn.Sequential(
# MobileLayer(256, 256, 3),
# MobileLayer(256, 256, 3),
MobileLayer(256, 256),
MobileLayer(256, 512, 2), # 26,下采样
)
self.trunk_13 = torch.nn.Sequential(
# MobileLayer(512, 512),
# MobileLayer(512, 512),
MobileLayer(512, 512),
MobileLayer(512, 1024, 2), # 13,下采样
)
self.convset_13 = torch.nn.Sequential(
ConvolutionalSet(1024, 512)
)
self.detetion_13 = torch.nn.Sequential(
# MobileLayer(512, 512, 3),
torch.nn.Conv2d(512, 21, 3, 1, 1)
)
self.up_26 = torch.nn.Sequential(
MobileLayer(512, 256),
UpsampleLayer()
)
self.convset_26 = torch.nn.Sequential(
ConvolutionalSet(768, 256)
)
self.detetion_26 = torch.nn.Sequential(
torch.nn.Conv2d(256, 21, 3, 1, 1)
)
self.up_52 = torch.nn.Sequential(
MobileLayer(256, 128),
UpsampleLayer()
)
self.convset_52 = torch.nn.Sequential(
ConvolutionalSet(384, 128)
)
self.detetion_52 = torch.nn.Sequential(
MobileLayer(128, 128),
torch.nn.Conv2d(128, 21, 3, 1, 1)
)
def forward(self, x):
# start_time = time.time()
h_52 = self.trunk_52(x)
h_26 = self.trunk_26(h_52)
h_13 = self.trunk_13(h_26)
convset_out_13 = self.convset_13(h_13)
detetion_out_13 = self.detetion_13(convset_out_13)
# end_time = time.time()
# print("........................",end_time - start_time)
up_out_26 = self.up_26(convset_out_13)
route_out_26 = torch.cat((up_out_26, h_26), dim=1)
convset_out_26 = self.convset_26(route_out_26)
detetion_out_26 = self.detetion_26(convset_out_26)
up_out_52 = self.up_52(convset_out_26)
route_out_52 = torch.cat((up_out_52, h_52), dim=1)
convset_out_52 = self.convset_52(route_out_52)
detetion_out_52 = self.detetion_52(convset_out_52)
return detetion_out_13, detetion_out_26, detetion_out_52
if __name__ == '__main__':
trunk = MainNet()
# print(sum(param.numel() for param in trunk.parameters()))
trunk.eval()
trunk.cuda().half()
x = torch.cuda.HalfTensor(1, 3, 128, 128)
y_13, y_26, y_52 = trunk(x)
# print(y_13.shape)
# print(y_26.shape)
# print(y_52.shape)
for _ in range(15):
start_time = time.time()
trunk(x)
end_time = time.time()
print(end_time - start_time)
print("===================================")
用时如下,将近0.02s: