Body estimation 代码复现之：结合 keras 对 Stack Hourglass 网络架构分析（全网最详细分析）

最新推荐文章于 2023-03-20 16:22:30 发布

暖仔会飞

最新推荐文章于 2023-03-20 16:22:30 发布

阅读量1.2k

点赞数

分类专栏：论文阅读与代码复现文章标签：网络架构深度学习

本文链接：https://blog.csdn.net/qq_42902997/article/details/122420922

版权

论文阅读与代码复现专栏收录该内容

24 篇文章

订阅专栏

该文深入剖析了Stacked Hourglass Network的实现原理及代码细节，包括网络架构、关键组件及多Hourglass堆叠机制。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章目录

代码
网络架构分析
- 说在前面
一个 stack 内部的结构
两个 stack 之间的部分处理
- create_heads(prelayerfeatures, rf1, num_classes, hgid, num_channels)
- create_front_module(input, num_channels, bottleneck)
回顾一个 hourglass 中的整个过程
- 通过 for 循环，将多个 hourglass 进行叠加，组成 stackHourglass 网络

使用的 github 资源路径：
https://github.com/yuanyuanli85/Stacked_Hourglass_Network_Keras/tree/master/src/net
大家可以参考着进行复现

代码

from keras.models import *
from keras.layers import *
from keras.optimizers import Adam, RMSprop
from keras.losses import mean_squared_error
import keras.backend as K


def create_hourglass_network(num_classes, num_stacks, num_channels, inres, outres, bottleneck):
    input = Input(shape=(inres[0], inres[1], 3))

    '''vgg16 提取出的 feature maps'''
    front_features = create_front_module(input, num_channels, bottleneck)

    head_next_stage = front_features

    outputs = []
    for i in range(num_stacks):
        '''每一个 stack 都有一部分信息是直接来自原始的 feature maps'''
        head_next_stage, head_to_loss = hourglass_module(head_next_stage, num_classes, num_channels, bottleneck, i)
        outputs.append(head_to_loss)

    model = Model(inputs=input, outputs=outputs)
    rms = RMSprop(lr=5e-4)
    model.compile(optimizer=rms, loss=mean_squared_error, metrics=["accuracy"])

    return model


def hourglass_module(bottom, num_classes, num_channels, bottleneck, hgid):
    '''
    bottom 在第一个 hourglass 中代表的就是最开始的 feature maps
    在后面的 hourglass 中代表前一个 hourglass 的输出
    :param bottom:
    :param num_classes:
    :param num_channels:
    :param bottleneck:
    :param hgid:
    :return:
    '''


    # create left features , f1, f2, f4, and f8
    left_features = create_left_half_blocks(bottom, bottleneck, hgid, num_channels)

    # create right features, connect with left features
    rf1 = create_right_half_blocks(left_features, bottleneck, hgid, num_channels)

    # add 1x1 conv with two heads, head_next_stage is sent to next stage
    # head_parts is used for intermediate supervision
    head_next_stage, head_parts = create_heads(bottom, rf1, num_classes, hgid, num_channels)


    '''经过这个 hourglass 的输出为 head_next_stage'''
    return head_next_stage, head_parts


def bottleneck_block(bottom, num_out_channels, block_name):
    # skip layer
    if K.int_shape(bottom)[-1] == num_out_channels:
        _skip = bottom
    else:
        _skip = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                       name=block_name + 'skip')(bottom)

    # residual: 3 conv blocks,  [num_out_channels/2  -> num_out_channels/2 -> num_out_channels]
    _x = Conv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
                name=block_name + '_conv_1x1_x1')(bottom)
    _x = BatchNormalization()(_x)
    _x = Conv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
                name=block_name + '_conv_3x3_x2')(_x)
    _x = BatchNormalization()(_x)
    _x = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                name=block_name + '_conv_1x1_x3')(_x)
    _x = BatchNormalization()(_x)
    _x = Add(name=block_name + '_residual')([_skip, _x])

    return _x


def bottleneck_mobile(bottom, num_out_channels, block_name):
    # skip layer
    if K.int_shape(bottom)[-1] == num_out_channels:
        _skip = bottom
    else:
        _skip = SeparableConv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                                name=block_name + 'skip')(bottom)

    # residual: 3 conv blocks,  [num_out_channels/2  -> num_out_channels/2 -> num_out_channels]
    _x = SeparableConv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
                         name=block_name + '_conv_1x1_x1')(bottom)
    _x = BatchNormalization()(_x)
    _x = SeparableConv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
                         name=block_name + '_conv_3x3_x2')(_x)
    _x = BatchNormalization()(_x)
    _x = SeparableConv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                         name=block_name + '_conv_1x1_x3')(_x)
    _x = BatchNormalization()(_x)
    _x = Add(name=block_name + '_residual')([_skip, _x])

    return _x


def create_front_module(input, num_channels, bottleneck):
    # front module, input to 1/4 resolution
    # 1 7x7 conv + maxpooling
    # 3 residual block

    _x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same', activation='relu', name='front_conv_1x1_x1')(
        input)
    _x = BatchNormalization()(_x)

    _x = bottleneck(_x, num_channels // 2, 'front_residual_x1')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(_x)

    _x = bottleneck(_x, num_channels // 2, 'front_residual_x2')
    _x = bottleneck(_x, num_channels, 'front_residual_x3')

    return _x


def create_left_half_blocks(bottom, bottleneck, hglayer, num_channels):
    # create left half blocks for hourglass module
    # f1, f2, f4 , f8 : 1, 1/2, 1/4 1/8 resolution

    hgname = 'hg' + str(hglayer)

    f1 = bottleneck(bottom, num_channels, hgname + '_l1')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f1)

    f2 = bottleneck(_x, num_channels, hgname + '_l2')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f2)

    f4 = bottleneck(_x, num_channels, hgname + '_l4')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f4)

    f8 = bottleneck(_x, num_channels, hgname + '_l8')

    return (f1, f2, f4, f8)


def connect_left_to_right(left, right, bottleneck, name, num_channels):
    '''
    :param left: connect left feature to right feature
    :param name: layer name
    :return:
    '''
    # left -> 1 bottlenect
    # right -> upsampling
    # Add   -> left + right

    _xleft = bottleneck(left, num_channels, name + '_connect')
    _xright = UpSampling2D()(right)
    add = Add()([_xleft, _xright])
    out = bottleneck(add, num_channels, name + '_connect_conv')
    return out


def bottom_layer(lf8, bottleneck, hgid, num_channels):
    # blocks in lowest resolution
    # 3 bottlenect blocks + Add

    lf8_connect = bottleneck(lf8, num_channels, str(hgid) + "_lf8")

    _x = bottleneck(lf8, num_channels, str(hgid) + "_lf8_x1")
    _x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x2")
    _x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x3")

    rf8 = Add()([_x, lf8_connect])

    return rf8


def create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels):
    lf1, lf2, lf4, lf8 = leftfeatures

    rf8 = bottom_layer(lf8, bottleneck, hglayer, num_channels)

    rf4 = connect_left_to_right(lf4, rf8, bottleneck, 'hg' + str(hglayer) + '_rf4', num_channels)

    rf2 = connect_left_to_right(lf2, rf4, bottleneck, 'hg' + str(hglayer) + '_rf2', num_channels)

    rf1 = connect_left_to_right(lf1, rf2, bottleneck, 'hg' + str(hglayer) + '_rf1', num_channels)

    return rf1


def create_heads(prelayerfeatures, rf1, num_classes, hgid, num_channels):
    # two head, one head to next stage, one head to intermediate features
    head = Conv2D(num_channels, kernel_size=(1, 1), activation='relu', padding='same', name=str(hgid) + '_conv_1x1_x1')(
        rf1)
    head = BatchNormalization()(head)

    # for head as intermediate supervision, use 'linear' as activation.
    head_parts = Conv2D(num_classes, kernel_size=(1, 1), activation='linear', padding='same',
                        name=str(hgid) + '_conv_1x1_parts')(head)

    # use linear activation
    head = Conv2D(num_channels, kernel_size=(1, 1), activation='linear', padding='same',
                  name=str(hgid) + '_conv_1x1_x2')(head)
    head_m = Conv2D(num_channels, kernel_size=(1, 1), activation='linear', padding='same',
                    name=str(hgid) + '_conv_1x1_x3')(head_parts)

    head_next_stage = Add()([head, head_m, prelayerfeatures])
    return head_next_stage, head_parts


def euclidean_loss(x, y):
    return K.sqrt(K.sum(K.square(x - y)))

网络架构分析

说在前面

stack hourglass 的原意是堆叠多个 hourglass 结构的网络， stack 不代指 hourglass 中的结构，但是下文在分析的时候，我们认为一个 hourglass 是一个 stack 的内部和 stack外部组成的，这样更容易弄清楚每个部分的对应关系。
如果有什么歧义或者不理解的欢迎留言讨论

一个 stack 内部的结构

create_left_half_blocks(bottom, bottleneck, hglayer, num_channels)

bottom 指的是整个 left_half_blocks 的输入，也就是每个 stack 中的 f1 的输入
bottleneck 指的是构建网络时选用 bottleneck_block() 还是 bottleneck_mobile()
hglayer 是用来组成 layer 命名的参数
num_channels 是这个 block 里面最终输出的通道数目
f1, f2, f4, f8 分别是四个 bottleneck block，他们内部其实通过 $1 \times 1$ 卷积来调整通道维度，通道维度一直是保持一致的，也就是说 f1, f2, f4, f8 各自结构内部都是残差结构，而且通道数是一样的。但是在每一个 f 层之间，都进行了一次池化操作，使得图像分辨率到 f8 的时候变成了 $\frac{1}{8}$
最后，create_left_half_blocks 返回的是四个 bottleneck block 的输出特征图

def bottleneck_block(bottom, num_out_channels, block_name):
    # skip layer
    if K.int_shape(bottom)[-1] == num_out_channels:
        _skip = bottom
    else:
        _skip = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                       name=block_name + 'skip')(bottom)

    # residual: 3 conv blocks,  [num_out_channels/2  -> num_out_channels/2 -> num_out_channels]
    _x = Conv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
                name=block_name + '_conv_1x1_x1')(bottom)
    _x = BatchNormalization()(_x)
    _x = Conv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
                name=block_name + '_conv_3x3_x2')(_x)
    _x = BatchNormalization()(_x)
    _x = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
                name=block_name + '_conv_1x1_x3')(_x)
    _x = BatchNormalization()(_x)
    _x = Add(name=block_name + '_residual')([_skip, _x])

    return _x



def create_left_half_blocks(bottom, bottleneck, hglayer, num_channels):
    # create left half blocks for hourglass module
    # f1, f2, f4 , f8 : 1, 1/2, 1/4 1/8 resolution

    hgname = 'hg' + str(hglayer)

    f1 = bottleneck(bottom, num_channels, hgname + '_l1')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f1)

    f2 = bottleneck(_x, num_channels, hgname + '_l2')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f2)

    f4 = bottleneck(_x, num_channels, hgname + '_l4')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f4)

    f8 = bottleneck(_x, num_channels, hgname + '_l8')

    return (f1, f2, f4, f8)

bottom_layer(lf8, bottleneck, hgid, num_channels)

lf8 就是 f8 的输出
hgid 也是用来组成 layer 名称的参数
bottom layer 部分包含下面几个部分：
- f8 的输出进入 lf8_connect 和 _x
- _x 再通过两个 bottleneck block 进行卷积操作
- 最终 _x 和 lf8_connect 的值进行加和得到 rf8

在这里插入图片描述

def bottom_layer(lf8, bottleneck, hgid, num_channels):
    # blocks in lowest resolution
    # 3 bottlenect blocks + Add

    lf8_connect = bottleneck(lf8, num_channels, str(hgid) + "_lf8")

    _x = bottleneck(lf8, num_channels, str(hgid) + "_lf8_x1")
    _x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x2")
    _x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x3")

    rf8 = Add()([_x, lf8_connect])

    return rf8

create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels)

leftfeatures 指的是左边的卷积层（f1，f2，f4 ）（f8 的处理放在 bottom_layer 中）
我们这里就拿 f1 的从左到右融合的过程详细解释一下，其他的 f2 f4 f8 也都是一样的：
- 首先， f1 的输出 lf1 通过一个 bottleneck block 进行卷积
- rf2 的输出经过 upsample2D 操作之后，和左边过来的特征进行融合
- 这两步就是 connect_left_to_right 这个函数做的事情
- 当然值得注意的是，在 create_right_half_blocks 中，要从 rf8 特征开始生成，又内层到外层进行生成，即：

def connect_left_to_right(left, right, bottleneck, name, num_channels):
    '''
    :param left: connect left feature to right feature
    :param name: layer name
    :return:
    '''
    # left -> 1 bottlenect
    # right -> upsampling
    # Add   -> left + right

    _xleft = bottleneck(left, num_channels, name + '_connect')
    _xright = UpSampling2D()(right)
    add = Add()([_xleft, _xright])
    out = bottleneck(add, num_channels, name + '_connect_conv')
    return out


def create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels):
    lf1, lf2, lf4, lf8 = leftfeatures

    rf8 = bottom_layer(lf8, bottleneck, hglayer, num_channels)

    rf4 = connect_left_to_right(lf4, rf8, bottleneck, 'hg' + str(hglayer) + '_rf4', num_channels)

    rf2 = connect_left_to_right(lf2, rf4, bottleneck, 'hg' + str(hglayer) + '_rf2', num_channels)

    rf1 = connect_left_to_right(lf1, rf2, bottleneck, 'hg' + str(hglayer) + '_rf1', num_channels)

    return rf1

上面完成的部分相当于下图中，用绿色的线表示的部分
接下来要表示的是进入一个 stack 之前和一个 stack 出来之后的操作

两个 stack 之间的部分处理

create_heads(prelayerfeatures, rf1, num_classes, hgid, num_channels)

prelayerfeatures 即上一个 stack 产生的最终特征图，对于第一个 stack 来说，prelayerfeatures 就是输入 input 通过 create_front_module() 而产生的特征图
head 就是一个卷积层（这里不是 bottleneck 了而是卷积层，激活函数是 relu）
head_parts 是一个卷积层，激活函数是 “linear”
最终 head 和 head_parts 分别再经过一个卷积层通过 $1 \times 1$ 卷积调整维度之后得到 head, head_m 然后 head, head_m, prelayerfeatures 共同组成了下一个 stack 的输入特征
head_parts 最终会作为中间监督层使用的 feature map

create_front_module(input, num_channels, bottleneck)

就是个普通的卷积网络，用于提取最初的特征

def create_front_module(input, num_channels, bottleneck):
    # front module, input to 1/4 resolution
    # 1 7x7 conv + maxpooling
    # 3 residual block

    _x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same', activation='relu', name='front_conv_1x1_x1')(
        input)
    _x = BatchNormalization()(_x)

    _x = bottleneck(_x, num_channels // 2, 'front_residual_x1')
    _x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(_x)

    _x = bottleneck(_x, num_channels // 2, 'front_residual_x2')
    _x = bottleneck(_x, num_channels, 'front_residual_x3')

    return _x

回顾一个 hourglass 中的整个过程

ef hourglass_module(bottom, num_classes, num_channels, bottleneck, hgid):
    '''
    bottom 在第一个 hourglass 中代表的就是最开始的 feature maps
    在后面的 hourglass 中代表前一个 hourglass 的输出
    :param bottom:
    :param num_classes:
    :param num_channels:
    :param bottleneck:
    :param hgid:
    :return:
    '''


    # create left features , f1, f2, f4, and f8
    left_features = create_left_half_blocks(bottom, bottleneck, hgid, num_channels)

    # create right features, connect with left features
    rf1 = create_right_half_blocks(left_features, bottleneck, hgid, num_channels)

    # add 1x1 conv with two heads, head_next_stage is sent to next stage
    # head_parts is used for intermediate supervision
    head_next_stage, head_parts = create_heads(bottom, rf1, num_classes, hgid, num_channels)


    '''经过这个 hourglass 的输出为 head_next_stage'''
    return head_next_stage, head_parts

很显然要组成一个 hourglass 的 module 就要既包含一个 stack 内，也要包含一个 stack 外面。
首先输入上一个 hourglass 的特征图 bottom，我们通过 create_left_half_blocks 得到 left_features
然后在一个 stack 内部通过 create_right_half_blocks 得到最终一个 stack 的输出特征 rf1
然后通过 create_heads() 我们进行特征的整合并返回了我们进行中间监督所使用的 head_parts 特征图和这一个 hourglass 的总的输出特征 head_next_stage

通过 for 循环，将多个 hourglass 进行叠加，组成 stackHourglass 网络


def create_hourglass_network(num_classes, num_stacks, num_channels, inres, outres, bottleneck):
    input = Input(shape=(inres[0], inres[1], 3))

    '''vgg16 提取出的 feature maps'''
    front_features = create_front_module(input, num_channels, bottleneck)

    head_next_stage = front_features

    outputs = []
    for i in range(num_stacks):
        '''每一个 stack 都有一部分信息是直接来自 原始的 feature maps'''
        head_next_stage, head_to_loss = hourglass_module(head_next_stage, num_classes, num_channels, bottleneck, i)
        outputs.append(head_to_loss)

    model = Model(inputs=input, outputs=outputs)
    rms = RMSprop(lr=5e-4)
    model.compile(optimizer=rms, loss=mean_squared_error, metrics=["accuracy"])

    return model