前言
HRNet 是HPE领域的经典之作,本文以HRNet-w32为例,介绍模型基于pytorch的实现。
模型结构

模型由若干stage堆叠而成,其中包含了上下采样和transition操作。下面将逐个讲解。
上采样
HRNet的上采样操作是使用最近邻插值实现的,首先先使用卷积调整通道数,接着用最近邻插值恢复高分辨率。上采样如下图所示:

nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
)
下采样
下采样操作与上采样有点不同,它是先通过若干层卷积对图片的分辨率进行降低,最后再使用一层卷积对图片分辨率降低的同时调整通道数,下采样如下图所示:

ops= []
for _ in range(i-j-1):#若干层卷积
ops.append(
nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
ops.append( #最后一层卷积
nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
self.fuse_layers[-1].append(nn.Sequential(*ops))
transition
它的作用就是将上一阶段的结果传给下一阶段,同时下采样增加一个分支。以trainsition2为例:

self.transition2 = nn.ModuleList([
nn.Identity(),
nn.Identity(),
nn.Sequential(
nn.Conv2d(base_channel * 2,base_channel * 4,kernel_size=3,stride=2,padding=1,bias=False),
nn.BatchNorm2d(base_channel * 4,momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
])
stage
它的主要作用就是将不同的分辨率特征图进行融合,每个stage阶段堆叠若干层,假设有i个输出和j个输入,对于第i个输出,它由三个部分组成:
- 当i==j时,这个时候平移第i个输入即可;
- 当i<j时,说明要进行上采样;
- 当i>j时,说明要进行下采样。
具体细节和解释请看下面代码:
class StageModule(nn.Module):
def __init__(self,input_branches,output_branches,c):
"""
构建对应stage,用来融合不同尺度的实现
:param input_branches:输入的分支数量
:param output_branches:输出的分支数量
:param c:第一个分支的通道数
"""
super(StageModule,self).__init__()
self.input_branches = input_branches
self.out_branches = output_branches
self.branches = nn.ModuleList() #存储BasicBlock
for i in range(self.input_branches):
w = c * (2 ** i)
branch = nn.Sequential(
BasicBlock(w,w),
BasicBlock(w,w),
BasicBlock(w,w),
BasicBlock(w,w)
)
self.branches.append(branch)
self.fuse_layers = nn.ModuleList() #用于融合不同尺度的特征映射
for i in range(self.out_branches):
self.fuse_layers.append(nn.ModuleList())
for j in range(self.input_branches):
if i == j: # 无需采样操作
self.fuse_layers[-1].append(nn.Identity()) #输入x返回x
elif i < j: # 则要进行上采样,先降低通道数,再用最近邻插值去上采样
self.fuse_layers[-1].append(
nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
)
)
else: # 则要进行下采样,先由i-j-1层卷积进行尺寸调整,再有第i-j层卷积调整尺寸和通道数
ops= []
for _ in range(i-j-1):#若干层卷积
ops.append(
nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
ops.append( #最后一层卷积
nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
self.fuse_layers[-1].append(nn.Sequential(*ops))
self.relu = nn.ReLU(inplace=True)
def forward(self,x): # x是一个列表里面存放input_branches个输入
#先将输入过BasicBlock块
x = [ self.branches[i](x[i]) for i in range(self.input_branches)] # 等价于 x = [branch[xi] for branch,xi in zip(self.branches,x)]
# 接着融合不同尺度的信息
x_fused = []
for i in range(len(self.fuse_layers)):
x_fused.append(
self.relu(
sum([self.fuse_layers[i][j](x[j]) for j in range(self.input_branches)])
)
)
return x_fused
model.py
这里给出所有模型的代码,难懂之处给出了笔者的理解:
import torch.nn as nn
BN_MOMENTUM = 0.1
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class StageModule(nn.Module):
def __init__(self,input_branches,output_branches,c):
"""
构建对应stage,用来融合不同尺度的实现
:param input_branches:输入的分支数量
:param output_branches:输出的分支数量
:param c:第一个分支的通道数
"""
super(StageModule,self).__init__()
self.input_branches = input_branches
self.out_branches = output_branches
self.branches = nn.ModuleList() #存储BasicBlock
for i in range(self.input_branches):
w = c * (2 ** i)
branch = nn.Sequential(
BasicBlock(w,w),
BasicBlock(w,w),
BasicBlock(w,w),
BasicBlock(w,w)
)
self.branches.append(branch)
self.fuse_layers = nn.ModuleList() #用于融合不同尺度的特征映射
for i in range(self.out_branches):
self.fuse_layers.append(nn.ModuleList())
for j in range(self.input_branches):
if i == j: # 无需采样操作
self.fuse_layers[-1].append(nn.Identity()) #输入x返回x
elif i < j: # 则要进行上采样,先降低通道数,再用最近邻插值去上采样
self.fuse_layers[-1].append(
nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
)
)
else: # 则要进行下采样,先由i-j-1层卷积进行尺寸调整,再有第i-j层卷积调整尺寸和通道数
ops= []
for _ in range(i-j-1):#若干层卷积
ops.append(
nn.Sequential(
nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
ops.append( #最后一层卷积
nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
self.fuse_layers[-1].append(nn.Sequential(*ops))
self.relu = nn.ReLU(inplace=True)
def forward(self,x): # x是一个列表里面存放input_branches个输入
#先将输入过BasicBlock块
x = [ self.branches[i](x[i]) for i in range(self.input_branches)] # 等价于 x = [branch[xi] for branch,xi in zip(self.branches,x)]
# 接着融合不同尺度的信息
x_fused = []
for i in range(len(self.fuse_layers)):
x_fused.append(
self.relu(
sum([self.fuse_layers[i][j](x[j]) for j in range(self.input_branches)])
)
)
return x_fused
class HighResolutionNet(nn.Module):
def __init__(self,base_channel:int = 32,num_joints:int = 17):
super(HighResolutionNet,self).__init__()
#stem
self.conv1 = nn.Conv2d(3,64,kernel_size=3,stride=2,padding=1,bias=False)
self.bn1 = nn.BatchNorm2d(64,momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(64,64,kernel_size=3,stride=2,padding=1,bias=False)
self.bn2 = nn.BatchNorm2d(64,BN=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
downsample = nn.Sequential(
nn.Conv2d(64,256,kernel_size=1,stride=1,bias=False),
nn.BatchNorm2d(256,momentum=BN_MOMENTUM)
)
self.layer1 = nn.Sequential(
Bottleneck(64, 64, downsample=downsample),
Bottleneck(256, 64),
Bottleneck(256, 64),
Bottleneck(256, 64)
)
self.transition1 = nn.ModuleList([
nn.Sequntial(
nn.Conv2d(256, base_channel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(base_channel, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
),
nn.Sequential(
nn.Sequential( # 这里又使用一次Sequential是为了适配原项目中提供的权重
nn.Conv2d(256, base_channel * 2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(base_channel * 2, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
])
# stage2
self.stage2 = nn.Sequential(
StageModule(input_branches=2,output_branches=2,c=base_channel)
)
# transition2
self.transition2 = nn.ModuleList([
nn.Identity(),
nn.Identity(),
nn.Sequential(
nn.Conv2d(base_channel * 2,base_channel * 4,kernel_size=3,stride=2,padding=1,bias=False),
nn.BatchNorm2d(base_channel * 4,momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
])
# stage3
self.stage3 = nn.sequential(
StageModule(input_branches=3,output_branches=3,c=base_channel),
StageModule(input_branches=3, output_branches=3, c=base_channel),
StageModule(input_branches=3, output_branches=3, c=base_channel),
StageModule(input_branches=3, output_branches=3, c=base_channel),
)
# transition3
self.transition3 = nn.ModuleList([
nn.Identity(), # None, - Used in place of "None" because it is callable
nn.Identity(), # None, - Used in place of "None" because it is callable
nn.Identity(), # None, - Used in place of "None" because it is callable
nn.Sequential(
nn.Sequential(
nn.Conv2d(base_channel * 4, base_channel * 8, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(base_channel * 8, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
)
])
# stage4
self.stage4 = nn.Sequential(
StageModule(input_branches=4,output_branches=4,c=base_channel),
StageModule(input_branches=4, output_branches=4, c=base_channel),
StageModule(input_branches=4, output_branches=1, c=base_channel),
)
# Final layer
self.final_layer = nn.Conv2d(base_channel,num_joints,kernel_size=1,stride=1)
def forward(self,x):
"""
:param x: 假设输入(1,3,256,192)
:return:
"""
# stem
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x) # x.shape(1,64,64,48)
# stage1
x = self.layer1(x) # x.shape(1,256,64,48)
# stage2
x = [ trans(x) for trans in self.transition1] # x变为列表[(1,32,64,48),(1,64,32,24)]
x = self.stage2(x)
# stage3
x = [
self.transition2[0](x[0]),
self.transition2[1](x[1]),
self.transition2[2](x[-1])
] # New branch derives from the "upper" branch only
x = self.stage3(x)
# stage4
x = [
self.transition3[0](x[0]),
self.transition3[1](x[1]),
self.transition3[2](x[2]),
self.transition3[3](x[-1]),
] # New branch derives from the "upper" branch only
x = self.stage4(x)
x = self.final_layer(x[0]) #x.shape(1,17,64,48)
return x
参考
本文参考:HRNet网络简介
如有侵权请告知,我会立即删除!
手动实现HRNet-w32模型
574

被折叠的 条评论
为什么被折叠?



