手动实现HRNet

手动实现HRNet-w32模型

原创于 2025-08-15 22:32:33 发布 · 426 阅读

6 ·

CC 4.0 BY-SA版权

文章标签：

#深度学习 #python

姿态估计专栏收录该内容

2 篇文章

订阅专栏

前言

HRNet 是HPE领域的经典之作，本文以HRNet-w32为例，介绍模型基于pytorch的实现。

模型结构

在这里插入图片描述
模型由若干stage堆叠而成，其中包含了上下采样和transition操作。下面将逐个讲解。

上采样

HRNet的上采样操作是使用最近邻插值实现的，首先先使用卷积调整通道数，接着用最近邻插值恢复高分辨率。上采样如下图所示：
在这里插入图片描述

 nn.Sequential(
                            nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
                            nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
                            nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
                        )

下采样

下采样操作与上采样有点不同，它是先通过若干层卷积对图片的分辨率进行降低，最后再使用一层卷积对图片分辨率降低的同时调整通道数，下采样如下图所示：
在这里插入图片描述

     ops= []
                    for _ in range(i-j-1):#若干层卷积
                        ops.append(
                            nn.Sequential(
                                nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
                                nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
                                nn.ReLU(inplace=True)
                            )
                        )
                    ops.append( #最后一层卷积
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
                            nn.ReLU(inplace=True)
                        )
                    )
                    self.fuse_layers[-1].append(nn.Sequential(*ops))

transition

它的作用就是将上一阶段的结果传给下一阶段，同时下采样增加一个分支。以trainsition2为例：
在这里插入图片描述

self.transition2 = nn.ModuleList([
            nn.Identity(),
            nn.Identity(),
            nn.Sequential(
                nn.Conv2d(base_channel * 2,base_channel * 4,kernel_size=3,stride=2,padding=1,bias=False),
                nn.BatchNorm2d(base_channel * 4,momentum=BN_MOMENTUM),
                nn.ReLU(inplace=True)
            )
        ])

stage

它的主要作用就是将不同的分辨率特征图进行融合，每个stage阶段堆叠若干层，假设有i个输出和j个输入，对于第i个输出，它由三个部分组成：

当i==j时，这个时候平移第i个输入即可；
当i<j时，说明要进行上采样；
当i>j时，说明要进行下采样。

具体细节和解释请看下面代码：

class StageModule(nn.Module):
    def __init__(self,input_branches,output_branches,c):
        """
        构建对应stage,用来融合不同尺度的实现
        :param input_branches:输入的分支数量
        :param output_branches:输出的分支数量
        :param c:第一个分支的通道数
        """
        super(StageModule,self).__init__()
        self.input_branches = input_branches
        self.out_branches = output_branches

        self.branches = nn.ModuleList() #存储BasicBlock
        for i in range(self.input_branches):
            w = c * (2 ** i)
            branch = nn.Sequential(
                BasicBlock(w,w),
                BasicBlock(w,w),
                BasicBlock(w,w),
                BasicBlock(w,w)
            )
            self.branches.append(branch)

        self.fuse_layers = nn.ModuleList() #用于融合不同尺度的特征映射
        for i in range(self.out_branches):
            self.fuse_layers.append(nn.ModuleList())
            for j in range(self.input_branches):
                if i == j: # 无需采样操作
                    self.fuse_layers[-1].append(nn.Identity()) #输入x返回x
                elif i < j: # 则要进行上采样,先降低通道数，再用最近邻插值去上采样
                    self.fuse_layers[-1].append(
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
                            nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
                            nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
                        )
                    )
                else: # 则要进行下采样，先由i-j-1层卷积进行尺寸调整，再有第i-j层卷积调整尺寸和通道数
                    ops= []
                    for _ in range(i-j-1):#若干层卷积
                        ops.append(
                            nn.Sequential(
                                nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
                                nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
                                nn.ReLU(inplace=True)
                            )
                        )
                    ops.append( #最后一层卷积
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
                            nn.ReLU(inplace=True)
                        )
                    )
                    self.fuse_layers[-1].append(nn.Sequential(*ops))

        self.relu = nn.ReLU(inplace=True)

    def forward(self,x): # x是一个列表里面存放input_branches个输入
        #先将输入过BasicBlock块
        x = [ self.branches[i](x[i]) for i in range(self.input_branches)] # 等价于 x = [branch[xi] for branch,xi in zip(self.branches,x)]

        # 接着融合不同尺度的信息
        x_fused = []
        for i in range(len(self.fuse_layers)):
            x_fused.append(
                self.relu(
                    sum([self.fuse_layers[i][j](x[j]) for j in range(self.input_branches)])
                )
            )

        return x_fused

model.py

这里给出所有模型的代码，难懂之处给出了笔者的理解：

import torch.nn as nn

BN_MOMENTUM = 0.1

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
                                  momentum=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class StageModule(nn.Module):
    def __init__(self,input_branches,output_branches,c):
        """
        构建对应stage,用来融合不同尺度的实现
        :param input_branches:输入的分支数量
        :param output_branches:输出的分支数量
        :param c:第一个分支的通道数
        """
        super(StageModule,self).__init__()
        self.input_branches = input_branches
        self.out_branches = output_branches

        self.branches = nn.ModuleList() #存储BasicBlock
        for i in range(self.input_branches):
            w = c * (2 ** i)
            branch = nn.Sequential(
                BasicBlock(w,w),
                BasicBlock(w,w),
                BasicBlock(w,w),
                BasicBlock(w,w)
            )
            self.branches.append(branch)

        self.fuse_layers = nn.ModuleList() #用于融合不同尺度的特征映射
        for i in range(self.out_branches):
            self.fuse_layers.append(nn.ModuleList())
            for j in range(self.input_branches):
                if i == j: # 无需采样操作
                    self.fuse_layers[-1].append(nn.Identity()) #输入x返回x
                elif i < j: # 则要进行上采样,先降低通道数，再用最近邻插值去上采样
                    self.fuse_layers[-1].append(
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j),c * (2 ** i),kernel_size=1,stride=1),
                            nn.BatchNorm2d(c * (2 ** i),momentum=BN_MOMENTUM),
                            nn.Upsample(scale_factor= 2.0 ** (j-i),mode="nearest") #扩大2的j-i次方倍
                        )
                    )
                else: # 则要进行下采样，先由i-j-1层卷积进行尺寸调整，再有第i-j层卷积调整尺寸和通道数
                    ops= []
                    for _ in range(i-j-1):#若干层卷积
                        ops.append(
                            nn.Sequential(
                                nn.Conv2d(c * (2 ** j),c * (2 ** j),kernel_size=3,stride=2,padding=1),
                                nn.BatchNorm2d(c * (2 ** j),momentum=BN_MOMENTUM),
                                nn.ReLU(inplace=True)
                            )
                        )
                    ops.append( #最后一层卷积
                        nn.Sequential(
                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1),
                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),
                            nn.ReLU(inplace=True)
                        )
                    )
                    self.fuse_layers[-1].append(nn.Sequential(*ops))

        self.relu = nn.ReLU(inplace=True)

    def forward(self,x): # x是一个列表里面存放input_branches个输入
        #先将输入过BasicBlock块
        x = [ self.branches[i](x[i]) for i in range(self.input_branches)] # 等价于 x = [branch[xi] for branch,xi in zip(self.branches,x)]

        # 接着融合不同尺度的信息
        x_fused = []
        for i in range(len(self.fuse_layers)):
            x_fused.append(
                self.relu(
                    sum([self.fuse_layers[i][j](x[j]) for j in range(self.input_branches)])
                )
            )

        return x_fused


class HighResolutionNet(nn.Module):
    def __init__(self,base_channel:int = 32,num_joints:int = 17):
        super(HighResolutionNet,self).__init__()
        #stem
        self.conv1 = nn.Conv2d(3,64,kernel_size=3,stride=2,padding=1,bias=False)
        self.bn1 = nn.BatchNorm2d(64,momentum=BN_MOMENTUM)
        self.conv2 = nn.Conv2d(64,64,kernel_size=3,stride=2,padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(64,BN=BN_MOMENTUM)
        self.relu = nn.ReLU(inplace=True)

        downsample = nn.Sequential(
            nn.Conv2d(64,256,kernel_size=1,stride=1,bias=False),
            nn.BatchNorm2d(256,momentum=BN_MOMENTUM)
        )
        self.layer1 = nn.Sequential(
            Bottleneck(64, 64, downsample=downsample),
            Bottleneck(256, 64),
            Bottleneck(256, 64),
            Bottleneck(256, 64)
        )

        self.transition1 = nn.ModuleList([
            nn.Sequntial(
                nn.Conv2d(256, base_channel, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(base_channel, momentum=BN_MOMENTUM),
                nn.ReLU(inplace=True)
            ),
            nn.Sequential(
                nn.Sequential(  # 这里又使用一次Sequential是为了适配原项目中提供的权重
                    nn.Conv2d(256, base_channel * 2, kernel_size=3, stride=2, padding=1, bias=False),
                    nn.BatchNorm2d(base_channel * 2, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True)
                )
            )
        ])

        # stage2
        self.stage2 = nn.Sequential(
            StageModule(input_branches=2,output_branches=2,c=base_channel)
        )

        # transition2
        self.transition2 = nn.ModuleList([
            nn.Identity(),
            nn.Identity(),
            nn.Sequential(
                nn.Conv2d(base_channel * 2,base_channel * 4,kernel_size=3,stride=2,padding=1,bias=False),
                nn.BatchNorm2d(base_channel * 4,momentum=BN_MOMENTUM),
                nn.ReLU(inplace=True)
            )
        ])

        # stage3
        self.stage3 = nn.sequential(
            StageModule(input_branches=3,output_branches=3,c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel),
            StageModule(input_branches=3, output_branches=3, c=base_channel),
        )

        # transition3
        self.transition3 = nn.ModuleList([
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Identity(),  # None,  - Used in place of "None" because it is callable
            nn.Sequential(
                nn.Sequential(
                    nn.Conv2d(base_channel * 4, base_channel * 8, kernel_size=3, stride=2, padding=1, bias=False),
                    nn.BatchNorm2d(base_channel * 8, momentum=BN_MOMENTUM),
                    nn.ReLU(inplace=True)
                )
            )
        ])

        # stage4
        self.stage4 = nn.Sequential(
            StageModule(input_branches=4,output_branches=4,c=base_channel),
            StageModule(input_branches=4, output_branches=4, c=base_channel),
            StageModule(input_branches=4, output_branches=1, c=base_channel),
        )

        # Final layer
        self.final_layer = nn.Conv2d(base_channel,num_joints,kernel_size=1,stride=1)


    def forward(self,x):

        """
        :param x: 假设输入(1,3,256,192)
        :return:
        """

        # stem
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x) # x.shape(1,64,64,48)

        # stage1
        x = self.layer1(x) # x.shape(1,256,64,48)

        # stage2
        x = [ trans(x) for trans in self.transition1] # x变为列表[(1,32,64,48),(1,64,32,24)]
        x = self.stage2(x)

        # stage3
        x = [
            self.transition2[0](x[0]),
            self.transition2[1](x[1]),
            self.transition2[2](x[-1])
        ]  # New branch derives from the "upper" branch only
        x = self.stage3(x)

        # stage4
        x = [
            self.transition3[0](x[0]),
            self.transition3[1](x[1]),
            self.transition3[2](x[2]),
            self.transition3[3](x[-1]),
        ]  # New branch derives from the "upper" branch only
        x = self.stage4(x)

        x = self.final_layer(x[0]) #x.shape(1,17,64,48)

        return x