1.使用Renset50作为主干特征提取网络,最终会获得一16x16x2048的有效特征层
2.对获取到的特征进行上采样,进行分类预测和回归预测
Renset50
resnet基本结构
其中的CONV表示卷积层,Batch Norm表示Batch 归一化层,ID BLOCK表示Identity块,由多个层构成,具体见第二个图。Conv BLOCK表示卷积块,由多个层构成。为了使得model个结构更加清晰,才提取出了conv block 和id block两个‘块’,分别把它们封装成函数。 如果不了解batch norm,可以暂时滤过这部分的内容,可以把它看作是一个特殊的层,它不会改变数据的维度。这将不影响对resnet实现的理解。
CenterNet模型
CenterNet模型使用resnet50作为主干网络对图片进行特征计算,并使用高斯核(热力图),focal loss等算法对resnet50模型输出的特征进行有监督的训练,使模型可以所识别的物体的正确分类,尺寸,和偏移数据。
centernet模型采用关键点估计方法来找目标中心点,之后,在中心点位置回归出偏移存尺寸,位置,方向 CenterNet模型损失 CenterNet模型损失由3部分组成 关键点损失,尺寸损失,偏移量损失 尺寸损失,偏移量损失计算:直接对预测值和标签值做差值运算,再对绝对值求平均值 关键点
损失:通过高斯函数和focal loss计算关键点
高斯核函数: 两个矩形面积重叠率IOU / 两矩形框总面积
IOU = (h-r)*(w-r) / 2wh-(h-r)*(w-r)
模型优化和处理
1.问题:损失值为0 1. 重点关注样本的训练数据和标签,以及模型的输出信息。出现none值往往由于者种数据出现none值;
2.问题:学习率过大0.01,损失值为0。 一般,越复杂的模型对学习率的依赖越严重,学习率设置为0.0004左右
模型优化: 使用更好的网络res2net模型提取特征。 res2net对resnet残差块中的3X3卷积进行了扩充,复制多个通道,逐步卷积和融合
代码如下:
from __future__ import absolute_import, division, print_function
import math
import pdb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from torch.autograd import Variable
from torchvision.models.utils import load_state_dict_from_url
model_urls = {
'resnet18': './models/resnet18-5c106cde.pth',
'resnet34': './models/resnet34-333f7ec4.pth',
'resnet50': './models/resnet50-19c8e357.pth',
'resnet101': './models/resnet101-5d3b4d8f.pth',
'resnet152': './models/resnet152-b121ed2d.pth',
}
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
#-----------------------------------------------------------------#
# 使用Renset50作为主干特征提取网络,最终会获得一个
# 16x16x2048的有效特征层
#-----------------------------------------------------------------#
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
# 512,512,3 -> 256,256,64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# 256x256x64 -> 128x128x64
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
# 128x128x64 -> 128x128x256
self.layer1 = self._make_layer(block, 64, layers[0])
# 128x128x256 -> 64x64x512
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
# 64x64x512 -> 32x32x1024
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
# 32x32x1024 -> 16x16x2048
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet50(pretrain = True):
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrain:
state_dict = load_state_dict_from_url(model_urls['resnet50'])
model.load_state_dict(state_dict)
# 获取特征提取部分
features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3, model.layer4])
features = nn.Sequential(*features)
return features
class resnet50_Decoder(nn.Module):
def __init__(self, inplanes, bn_momentum=0.1):
super(resnet50_Decoder, self).__init__()
self.bn_momentum = bn_momentum
self.inplanes = inplanes
self.deconv_with_bias = False
#----------------------------------------------------------#
# 16,16,2048 -> 32,32,256 -> 64,64,128 -> 128,128,64
# 利用ConvTranspose2d进行上采样。
# 每次特征层的宽高变为原来的两倍。
#----------------------------------------------------------#
self.deconv_layers = self._make_deconv_layer(
num_layers=3,
num_filters=[256, 128, 64],
num_kernels=[4, 4, 4],
)
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
layers = []
for i in range(num_layers):
kernel = num_kernels[i]
planes = num_filters[i]
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=1,
output_padding=0,
bias=self.deconv_with_bias))
layers.append(nn.BatchNorm2d(planes, momentum=self.bn_momentum))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
return self.deconv_layers(x)
class resnet50_Head(nn.Module):
def __init__(self, num_classes=80, channel=64, bn_momentum=0.1):
super(resnet50_Head, self).__init__()
#-----------------------------------------------------------------#
# 对获取到的特征进行上采样,进行分类预测和回归预测
# 1628, 128, 4 -> 128, 128, 64 -> 128, 128, num_classes
# -> 128, 128, 64 -> 128, 128, 2
# -> 128, 128, 64 -> 128, 128, 2
#-----------------------------------------------------------------#
# 热力图预测部分
self.cls_head = nn.Sequential(
nn.Conv2d(64, channel,
kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64, momentum=bn_momentum),
nn.ReLU(inplace=True),
nn.Conv2d(channel, num_classes,
kernel_size=1, stride=1, padding=0))
# 宽高预测的部分
self.wh_head = nn.Sequential(
nn.Conv2d(64, channel,
kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64, momentum=bn_momentum),
nn.ReLU(inplace=True),
nn.Conv2d(channel, 2,
kernel_size=1, stride=1, padding=0))
# 中心点预测的部分
self.reg_head = nn.Sequential(
nn.Conv2d(64, channel,
kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64, momentum=bn_momentum),
nn.ReLU(inplace=True),
nn.Conv2d(channel, 2,
kernel_size=1, stride=1, padding=0))
def forward(self, x):
hm = self.cls_head(x).sigmoid_()
wh = self.wh_head(x)
offset = self.reg_head(x)
return hm, wh, offset
import numpy as np
import torch
import torch.nn as nn
#-------------------------#
# 卷积+标准化+激活函数
#-------------------------#
class conv2d(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(conv2d, self).__init__()
pad = (k - 1) // 2
self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv = self.conv(x)
bn = self.bn(conv)
relu = self.relu(bn)
return relu
#-------------------------#
# 残差结构
#-------------------------#
class residual(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(residual, self).__init__()
self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
self.bn1 = nn.BatchNorm2d(out_dim)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(out_dim)
self.skip = nn.Sequential(
nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
nn.BatchNorm2d(out_dim)
) if stride != 1 or inp_dim != out_dim else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv1 = self.conv1(x)
bn1 = self.bn1(conv1)
relu1 = self.relu1(bn1)
conv2 = self.conv2(relu1)
bn2 = self.bn2(conv2)
skip = self.skip(x)
return self.relu(bn2 + skip)
def make_layer(k, inp_dim, out_dim, modules, **kwargs):
layers = [residual(k, inp_dim, out_dim, **kwargs)]
for _ in range(modules - 1):
layers.append(residual(k, out_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
def make_hg_layer(k, inp_dim, out_dim, modules, **kwargs):
layers = [residual(k, inp_dim, out_dim, stride=2)]
for _ in range(modules - 1):
layers += [residual(k, out_dim, out_dim)]
return nn.Sequential(*layers)
def make_layer_revr(k, inp_dim, out_dim, modules, **kwargs):
layers = []
for _ in range(modules - 1):
layers.append(residual(k, inp_dim, inp_dim, **kwargs))
layers.append(residual(k, inp_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
class kp_module(nn.Module):
def __init__(self, n, dims, modules, **kwargs):
super(kp_module, self).__init__()
self.n = n
curr_mod = modules[0]
next_mod = modules[1]
curr_dim = dims[0]
next_dim = dims[1]
# 将输入进来的特征层进行两次残差卷积,便于和后面的层进行融合
self.up1 = make_layer(
3, curr_dim, curr_dim, curr_mod, **kwargs
)
# 进行下采样
self.low1 = make_hg_layer(
3, curr_dim, next_dim, curr_mod, **kwargs
)
# 构建U形结构的下一层
if self.n > 1 :
self.low2 = kp_module(
n - 1, dims[1:], modules[1:], **kwargs
)
else:
self.low2 = make_layer(
3, next_dim, next_dim, next_mod, **kwargs
)
# 将U形结构下一层反馈上来的层进行残差卷积
self.low3 = make_layer_revr(
3, next_dim, curr_dim, curr_mod, **kwargs
)
# 将U形结构下一层反馈上来的层进行上采样
self.up2 = nn.Upsample(scale_factor=2)
def forward(self, x):
up1 = self.up1(x)
low1 = self.low1(x)
low2 = self.low2(low1)
low3 = self.low3(low2)
up2 = self.up2(low3)
outputs = up1 + up2
return outputs
import torch.nn as nn
#基本残差块
class HgResBolck(nn.Module):
def __init__(self, inplanes, outplanes, stride=1):
super(HgResBolck, self).__init__()
self.inplanes = inplanes
self.outplanes = outplanes
midplanes = outplanes //2
self.bn_1 = nn.BatchNorm2d(inplanes)
self.conv_1 = nn.Conv2d(inplanes, midplanes, kernel_size=1, stride =stride)
self.bn_2 = nn.BatchNorm2d(midplanes)
self.conv_2 = nn.Conv2d(midplanes, midplanes, kernel_size=3, stride=stride)
self.bn_3 = nn.BatchNorm2d(midplanes)
self.conv_3 = nn.Conv2d(midplanes, outplanes, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplanes=True)
if inplanes!= outplanes:
self.conv_skip = nn.Conv2d(inplanes, outplanes,kernel_size=1, stride=1)
#Bottle neck
def forward(self, x):
residual = x
out = self.bn_1(x)
out = self.conv_1(out)
out = self.relu(out)
out = self.bn_2(out)
out = self.conv_2(out)
out = self.relu(out)
out = self.bn_3(out)
out = self.conv_3(out)
out = self.relu(out)
if self.inplanes!=self.outplanes:
residual = self.conv_skip(residual)
out += residual
return out
#单个Hourglass Module
class Hourglass(nn.Module):
def __init__(self, depth, nFeat, nModules, resBlocks):
super(Hourglass, self).__init__()
self.depth = depth
self.nFeat = nFeat
# num residual modules per location
self.nModeules = nModules
self.resBlocks = resBlocks
self.hg = self._make_hourglass()
self.downsample = nn.MaxPool2d(kernel_size=2, stride=2)
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
def _make_residual(self,n):
return nn.Sequential(*[self.resBlocks(self.nFeat,self.nFeat) for _ in range(n)])
def _make_hourglass(self):
hg = []
for i in range(self.depth):
# skip(upper branch); down_path, up_path(lower branch)
res = [self._make_residual(self.nModeules) for _ in range(3)]
if i ==(self.depth-1):
## extra one for the middle
res.append(self._make_residual(self.nModeules))
hg.append(nn.ModuleList(res))
#hg = [[res,res,...],[],[],[]]
return nn.ModuleList(hg)
def _hourglass_forward(self, depth_id, x):
up_1 = self.hg[depth_id][0](x)
low_1 = self.downsample(x)
low_1 = self.hg[depth_id][1](low_1)
if depth_id == (self.depth -1):
low_2 = self.hg[depth_id][3](low_1)
else:
low_2 = self._hourglass_forward(depth_id+1, low_1)
low_3 = self.hg[depth_id][2](low_2)
up_2 = self.upsample(low_3)
return up_1 + up_2
def forward(self,x):
return self._hourglass_forward(0,x)
class HourglassNet(nn.Module):
def __init__(self, nStacks, nModules, nFeat, nClasses, resBlock=HgResBolck, inplanes=3):
super(HourglassNet, self).__init__()
self.nStacks = nStacks
self.nModules = nModules
self.nFeat = nFeat
self.nClasses = nClasses
self.resBlock = resBlock
self.inplanes = inplanes
hg, res, fc, score, fc_, score_ = [], [], [], [], [], []
for i in range(nStacks):
hg.append(Hourglass(depth=4, nFeat=nFeat, nModules=nModules, resBlocks=resBlock))
res.append(self._make_residual(nModules))
fc.append(self._make_fc(nFeat, nFeat))
score.append(nn.Conv2d(nFeat, nClasses, kernel_size=1))
if i < (nStacks - 1):
fc_.append(nn.Conv2d(nFeat, nFeat, kernel_size=1))
score_.append(nn.Conv2d(nClasses, nFeat, kernel_size=1))
self.hg = nn.ModuleList(hg)
self.res = nn.ModuleList(res)
self.fc = nn.ModuleList(fc)
self.score = nn.ModuleList(score)
self.fc_ = nn.ModuleList(fc_)
self.score_ = nn.ModuleList(score_)
def _make_head(self):
self.conv_1 = nn.Conv2d(self.inplanes, 64, kernel_size=7, stride=2, padding=3)
self.bn_1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.res_1 = self.resBlock(64, 128)
self.pool = nn.MaxPool2d(2, 2)
self.res_2 = self.resBlock(128, 128)
self.res_3 = self.resBlock(128, self.nFeat)
def _make_residual(self, n):
return nn.Sequential(*[self.resBlock(self.nFeat, self.nFeat) for _ in range(n)])
def _make_fc(self, inplanes, outplanes):
return nn.Sequential(
nn.Conv2d(inplanes, outplanes, kernel_size=1),
nn.BatchNorm2d(outplanes),
nn.ReLU(True))
def forward(self, x):
# head
x = self.conv_1(x)
x = self.bn_1(x)
x = self.relu(x)
x = self.res_1(x)
x = self.pool(x)
x = self.res_2(x)
x = self.res_3(x)
out = []
for i in range(self.nStacks):
y = self.hg[i](x)
y = self.res[i](y)
y = self.fc[i](y)
score = self.score[i](y)
out.append(score)
if i < (self.nStacks - 1):
fc_ = self.fc_[i](y)
score_ = self.score_[i](score)
x = x + fc_ + score_
return out
import torch
import torch.nn.functional as F
def focal_loss(pred, target):
pred = pred.permute(0,2,3,1)
#-------------------------------------------------------------------------#
# 找到每张图片的正样本和负样本
# 一个真实框对应一个正样本
# 除去正样本的特征点,其余为负样本
#-------------------------------------------------------------------------#
pos_inds = target.eq(1).float()
neg_inds = target.lt(1).float()
#-------------------------------------------------------------------------#
# 正样本特征点附近的负样本的权值更小一些
#-------------------------------------------------------------------------#
neg_weights = torch.pow(1 - target, 4)
pred = torch.clamp(pred, 1e-6, 1 - 1e-6)
#-------------------------------------------------------------------------#
# 计算focal loss。难分类样本权重大,易分类样本权重小。
#-------------------------------------------------------------------------#
pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
#-------------------------------------------------------------------------#
# 进行损失的归一化
#-------------------------------------------------------------------------#
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = -neg_loss
else:
loss = -(pos_loss + neg_loss) / num_pos
return loss
def reg_l1_loss(pred, target, mask):
#--------------------------------#
# 计算l1_loss
#--------------------------------#
pred = pred.permute(0,2,3,1)
expand_mask = torch.unsqueeze(mask,-1).repeat(1,1,1,2)
loss = F.l1_loss(pred * expand_mask, target * expand_mask, reduction='sum')
loss = loss / (mask.sum() + 1e-4)
return loss
import torch
import torch.nn.functional as F
from torch import nn
from nets.hourglass import *
from nets.resnet50 import resnet50, resnet50_Decoder, resnet50_Head
class CenterNet_Resnet50(nn.Module):
def __init__(self, num_classes = 20, pretrain = False):
super(CenterNet_Resnet50, self).__init__()
# 512,512,3 -> 16,16,2048
self.backbone = resnet50(pretrain=pretrain)
# 16,16,2048 -> 128,128,64
self.decoder = resnet50_Decoder(2048)
#-----------------------------------------------------------------#
# 对获取到的特征进行上采样,进行分类预测和回归预测
# 128, 128, 64 -> 128, 128, 64 -> 128, 128, num_classes
# -> 128, 128, 64 -> 128, 128, 2
# -> 128, 128, 64 -> 128, 128, 2
#-----------------------------------------------------------------#
self.head = resnet50_Head(channel=64, num_classes=num_classes)
def freeze_backbone(self):
for param in self.backbone.parameters():
param.requires_grad = False
def unfreeze_backbone(self):
for param in self.backbone.parameters():
param.requires_grad = True
def forward(self, x):
feat = self.backbone(x)
return self.head(self.decoder(feat))
class CenterNet_HourglassNet(nn.Module):
def __init__(self, heads, num_stacks=2, n=5, cnv_dim=256, dims=[256, 256, 384, 384, 384, 512], modules = [2, 2, 2, 2, 2, 4]):
super(CenterNet_HourglassNet, self).__init__()
self.nstack = num_stacks
self.heads = heads
curr_dim = dims[0]
self.pre = nn.Sequential(
conv2d(7, 3, 128, stride=2),
residual(3, 128, 256, stride=2)
)
self.kps = nn.ModuleList([
kp_module(
n, dims, modules
) for _ in range(num_stacks)
])
self.cnvs = nn.ModuleList([
conv2d(3, curr_dim, cnv_dim) for _ in range(num_stacks)
])
self.inters = nn.ModuleList([
residual(3, curr_dim, curr_dim) for _ in range(num_stacks - 1)
])
self.inters_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(num_stacks - 1)
])
self.cnvs_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(num_stacks - 1)
])
for head in heads.keys():
if 'hm' in head:
module = nn.ModuleList([
nn.Sequential(
conv2d(3, cnv_dim, curr_dim, with_bn=False),
nn.Conv2d(curr_dim, heads[head], (1, 1))
) for _ in range(num_stacks)
])
self.__setattr__(head, module)
for heat in self.__getattr__(head):
heat[-1].bias.data.fill_(-2.19)
else:
module = nn.ModuleList([
nn.Sequential(
conv2d(3, cnv_dim, curr_dim, with_bn=False),
nn.Conv2d(curr_dim, heads[head], (1, 1))
) for _ in range(num_stacks)
])
self.__setattr__(head, module)
self.relu = nn.ReLU(inplace=True)
def freeze_backbone(self):
freeze_list = [self.pre, self.kps]
for module in freeze_list:
for param in module.parameters():
param.requires_grad = False
def unfreeze_backbone(self):
freeze_list = [self.pre, self.kps]
for module in freeze_list:
for param in module.parameters():
param.requires_grad = True
def forward(self, image):
# print('image shape', image.shape)
inter = self.pre(image)
outs = []
for ind in range(self.nstack):
kp = self.kps[ind](inter)
cnv = self.cnvs[ind](kp)
if ind < self.nstack - 1:
inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
inter = self.relu(inter)
inter = self.inters[ind](inter)
out = {}
for head in self.heads:
out[head] = self.__getattr__(head)[ind](cnv)
outs.append(out)
return outs