使用的 github 资源路径:
https://github.com/yuanyuanli85/Stacked_Hourglass_Network_Keras/tree/master/src/net
大家可以参考着进行复现
代码
from keras.models import *
from keras.layers import *
from keras.optimizers import Adam, RMSprop
from keras.losses import mean_squared_error
import keras.backend as K
def create_hourglass_network(num_classes, num_stacks, num_channels, inres, outres, bottleneck):
input = Input(shape=(inres[0], inres[1], 3))
'''vgg16 提取出的 feature maps'''
front_features = create_front_module(input, num_channels, bottleneck)
head_next_stage = front_features
outputs = []
for i in range(num_stacks):
'''每一个 stack 都有一部分信息是直接来自原始的 feature maps'''
head_next_stage, head_to_loss = hourglass_module(head_next_stage, num_classes, num_channels, bottleneck, i)
outputs.append(head_to_loss)
model = Model(inputs=input, outputs=outputs)
rms = RMSprop(lr=5e-4)
model.compile(optimizer=rms, loss=mean_squared_error, metrics=["accuracy"])
return model
def hourglass_module(bottom, num_classes, num_channels, bottleneck, hgid):
'''
bottom 在第一个 hourglass 中代表的就是最开始的 feature maps
在后面的 hourglass 中代表前一个 hourglass 的输出
:param bottom:
:param num_classes:
:param num_channels:
:param bottleneck:
:param hgid:
:return:
'''
# create left features , f1, f2, f4, and f8
left_features = create_left_half_blocks(bottom, bottleneck, hgid, num_channels)
# create right features, connect with left features
rf1 = create_right_half_blocks(left_features, bottleneck, hgid, num_channels)
# add 1x1 conv with two heads, head_next_stage is sent to next stage
# head_parts is used for intermediate supervision
head_next_stage, head_parts = create_heads(bottom, rf1, num_classes, hgid, num_channels)
'''经过这个 hourglass 的输出为 head_next_stage'''
return head_next_stage, head_parts
def bottleneck_block(bottom, num_out_channels, block_name):
# skip layer
if K.int_shape(bottom)[-1] == num_out_channels:
_skip = bottom
else:
_skip = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + 'skip')(bottom)
# residual: 3 conv blocks, [num_out_channels/2 -> num_out_channels/2 -> num_out_channels]
_x = Conv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x1')(bottom)
_x = BatchNormalization()(_x)
_x = Conv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
name=block_name + '_conv_3x3_x2')(_x)
_x = BatchNormalization()(_x)
_x = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x3')(_x)
_x = BatchNormalization()(_x)
_x = Add(name=block_name + '_residual')([_skip, _x])
return _x
def bottleneck_mobile(bottom, num_out_channels, block_name):
# skip layer
if K.int_shape(bottom)[-1] == num_out_channels:
_skip = bottom
else:
_skip = SeparableConv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + 'skip')(bottom)
# residual: 3 conv blocks, [num_out_channels/2 -> num_out_channels/2 -> num_out_channels]
_x = SeparableConv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x1')(bottom)
_x = BatchNormalization()(_x)
_x = SeparableConv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
name=block_name + '_conv_3x3_x2')(_x)
_x = BatchNormalization()(_x)
_x = SeparableConv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x3')(_x)
_x = BatchNormalization()(_x)
_x = Add(name=block_name + '_residual')([_skip, _x])
return _x
def create_front_module(input, num_channels, bottleneck):
# front module, input to 1/4 resolution
# 1 7x7 conv + maxpooling
# 3 residual block
_x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same', activation='relu', name='front_conv_1x1_x1')(
input)
_x = BatchNormalization()(_x)
_x = bottleneck(_x, num_channels // 2, 'front_residual_x1')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(_x)
_x = bottleneck(_x, num_channels // 2, 'front_residual_x2')
_x = bottleneck(_x, num_channels, 'front_residual_x3')
return _x
def create_left_half_blocks(bottom, bottleneck, hglayer, num_channels):
# create left half blocks for hourglass module
# f1, f2, f4 , f8 : 1, 1/2, 1/4 1/8 resolution
hgname = 'hg' + str(hglayer)
f1 = bottleneck(bottom, num_channels, hgname + '_l1')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f1)
f2 = bottleneck(_x, num_channels, hgname + '_l2')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f2)
f4 = bottleneck(_x, num_channels, hgname + '_l4')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f4)
f8 = bottleneck(_x, num_channels, hgname + '_l8')
return (f1, f2, f4, f8)
def connect_left_to_right(left, right, bottleneck, name, num_channels):
'''
:param left: connect left feature to right feature
:param name: layer name
:return:
'''
# left -> 1 bottlenect
# right -> upsampling
# Add -> left + right
_xleft = bottleneck(left, num_channels, name + '_connect')
_xright = UpSampling2D()(right)
add = Add()([_xleft, _xright])
out = bottleneck(add, num_channels, name + '_connect_conv')
return out
def bottom_layer(lf8, bottleneck, hgid, num_channels):
# blocks in lowest resolution
# 3 bottlenect blocks + Add
lf8_connect = bottleneck(lf8, num_channels, str(hgid) + "_lf8")
_x = bottleneck(lf8, num_channels, str(hgid) + "_lf8_x1")
_x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x2")
_x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x3")
rf8 = Add()([_x, lf8_connect])
return rf8
def create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels):
lf1, lf2, lf4, lf8 = leftfeatures
rf8 = bottom_layer(lf8, bottleneck, hglayer, num_channels)
rf4 = connect_left_to_right(lf4, rf8, bottleneck, 'hg' + str(hglayer) + '_rf4', num_channels)
rf2 = connect_left_to_right(lf2, rf4, bottleneck, 'hg' + str(hglayer) + '_rf2', num_channels)
rf1 = connect_left_to_right(lf1, rf2, bottleneck, 'hg' + str(hglayer) + '_rf1', num_channels)
return rf1
def create_heads(prelayerfeatures, rf1, num_classes, hgid, num_channels):
# two head, one head to next stage, one head to intermediate features
head = Conv2D(num_channels, kernel_size=(1, 1), activation='relu', padding='same', name=str(hgid) + '_conv_1x1_x1')(
rf1)
head = BatchNormalization()(head)
# for head as intermediate supervision, use 'linear' as activation.
head_parts = Conv2D(num_classes, kernel_size=(1, 1), activation='linear', padding='same',
name=str(hgid) + '_conv_1x1_parts')(head)
# use linear activation
head = Conv2D(num_channels, kernel_size=(1, 1), activation='linear', padding='same',
name=str(hgid) + '_conv_1x1_x2')(head)
head_m = Conv2D(num_channels, kernel_size=(1, 1), activation='linear', padding='same',
name=str(hgid) + '_conv_1x1_x3')(head_parts)
head_next_stage = Add()([head, head_m, prelayerfeatures])
return head_next_stage, head_parts
def euclidean_loss(x, y):
return K.sqrt(K.sum(K.square(x - y)))
网络架构分析
说在前面
stack hourglass
的原意是堆叠多个hourglass
结构的网络, stack 不代指 hourglass 中的结构,但是下文在分析的时候,我们认为一个 hourglass 是一个 stack 的内部和 stack外部组成的,这样更容易弄清楚每个部分的对应关系。- 如果有什么歧义或者不理解的欢迎留言讨论
一个 stack 内部的结构
create_left_half_blocks(bottom, bottleneck, hglayer, num_channels)
- bottom 指的是整个
left_half_blocks
的输入,也就是每个stack
中的f1
的输入 - bottleneck 指的是构建网络时选用
bottleneck_block()
还是bottleneck_mobile()
- hglayer 是用来组成
layer
命名的参数 - num_channels 是这个
block
里面最终输出的通道数目 f1, f2, f4, f8
分别是四个bottleneck block
,他们内部其实通过 1 × 1 1×1 1×1 卷积来调整通道维度,通道维度一直是保持一致的,也就是说f1, f2, f4, f8
各自结构内部都是残差结构,而且通道数是一样的。但是在每一个f
层之间,都进行了一次池化操作,使得图像分辨率到f8
的时候变成了 1 8 \frac{1}{8} 81- 最后,
create_left_half_blocks
返回的是四个bottleneck block
的输出特征图
def bottleneck_block(bottom, num_out_channels, block_name):
# skip layer
if K.int_shape(bottom)[-1] == num_out_channels:
_skip = bottom
else:
_skip = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + 'skip')(bottom)
# residual: 3 conv blocks, [num_out_channels/2 -> num_out_channels/2 -> num_out_channels]
_x = Conv2D(num_out_channels / 2, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x1')(bottom)
_x = BatchNormalization()(_x)
_x = Conv2D(num_out_channels / 2, kernel_size=(3, 3), activation='relu', padding='same',
name=block_name + '_conv_3x3_x2')(_x)
_x = BatchNormalization()(_x)
_x = Conv2D(num_out_channels, kernel_size=(1, 1), activation='relu', padding='same',
name=block_name + '_conv_1x1_x3')(_x)
_x = BatchNormalization()(_x)
_x = Add(name=block_name + '_residual')([_skip, _x])
return _x
def create_left_half_blocks(bottom, bottleneck, hglayer, num_channels):
# create left half blocks for hourglass module
# f1, f2, f4 , f8 : 1, 1/2, 1/4 1/8 resolution
hgname = 'hg' + str(hglayer)
f1 = bottleneck(bottom, num_channels, hgname + '_l1')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f1)
f2 = bottleneck(_x, num_channels, hgname + '_l2')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f2)
f4 = bottleneck(_x, num_channels, hgname + '_l4')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(f4)
f8 = bottleneck(_x, num_channels, hgname + '_l8')
return (f1, f2, f4, f8)
bottom_layer(lf8, bottleneck, hgid, num_channels)
lf8
就是f8
的输出hgid
也是用来组成layer
名称的参数bottom layer
部分包含下面几个部分:f8
的输出进入lf8_connect
和_x
_x
再通过两个bottleneck block
进行卷积操作- 最终
_x
和lf8_connect
的值进行加和得到rf8
def bottom_layer(lf8, bottleneck, hgid, num_channels):
# blocks in lowest resolution
# 3 bottlenect blocks + Add
lf8_connect = bottleneck(lf8, num_channels, str(hgid) + "_lf8")
_x = bottleneck(lf8, num_channels, str(hgid) + "_lf8_x1")
_x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x2")
_x = bottleneck(_x, num_channels, str(hgid) + "_lf8_x3")
rf8 = Add()([_x, lf8_connect])
return rf8
create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels)
-
leftfeatures 指的是左边的卷积层(f1,f2,f4 )(f8 的处理放在 bottom_layer 中)
-
我们这里就拿
f1
的从左到右融合的过程详细解释一下,其他的f2 f4 f8
也都是一样的:- 首先,
f1
的输出lf1
通过一个bottleneck block
进行卷积 rf2
的输出经过upsample2D
操作之后,和左边过来的特征进行融合- 这两步就是
connect_left_to_right
这个函数做的事情 - 当然值得注意的是,在
create_right_half_blocks
中,要从rf8
特征开始生成,又内层到外层进行生成,即:
- 首先,
def connect_left_to_right(left, right, bottleneck, name, num_channels):
'''
:param left: connect left feature to right feature
:param name: layer name
:return:
'''
# left -> 1 bottlenect
# right -> upsampling
# Add -> left + right
_xleft = bottleneck(left, num_channels, name + '_connect')
_xright = UpSampling2D()(right)
add = Add()([_xleft, _xright])
out = bottleneck(add, num_channels, name + '_connect_conv')
return out
def create_right_half_blocks(leftfeatures, bottleneck, hglayer, num_channels):
lf1, lf2, lf4, lf8 = leftfeatures
rf8 = bottom_layer(lf8, bottleneck, hglayer, num_channels)
rf4 = connect_left_to_right(lf4, rf8, bottleneck, 'hg' + str(hglayer) + '_rf4', num_channels)
rf2 = connect_left_to_right(lf2, rf4, bottleneck, 'hg' + str(hglayer) + '_rf2', num_channels)
rf1 = connect_left_to_right(lf1, rf2, bottleneck, 'hg' + str(hglayer) + '_rf1', num_channels)
return rf1
- 上面完成的部分相当于下图中,用绿色的线表示的部分
- 接下来要表示的是进入一个 stack 之前和一个 stack 出来之后的操作
两个 stack 之间的部分处理
create_heads(prelayerfeatures, rf1, num_classes, hgid, num_channels)
prelayerfeatures
即上一个 stack 产生的最终特征图,对于第一个 stack 来说,prelayerfeatures
就是输入input
通过create_front_module()
而产生的特征图head
就是一个卷积层(这里不是bottleneck
了而是卷积层,激活函数是relu
)head_parts
是一个卷积层,激活函数是“linear”
- 最终 head 和 head_parts 分别再经过一个卷积层通过
1
×
1
1×1
1×1 卷积调整维度之后得到
head, head_m
然后head, head_m, prelayerfeatures
共同组成了下一个 stack 的输入特征 head_parts
最终会作为中间监督层使用的feature map
create_front_module(input, num_channels, bottleneck)
- 就是个普通的卷积网络,用于提取最初的特征
def create_front_module(input, num_channels, bottleneck):
# front module, input to 1/4 resolution
# 1 7x7 conv + maxpooling
# 3 residual block
_x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same', activation='relu', name='front_conv_1x1_x1')(
input)
_x = BatchNormalization()(_x)
_x = bottleneck(_x, num_channels // 2, 'front_residual_x1')
_x = MaxPool2D(pool_size=(2, 2), strides=(2, 2))(_x)
_x = bottleneck(_x, num_channels // 2, 'front_residual_x2')
_x = bottleneck(_x, num_channels, 'front_residual_x3')
return _x
回顾一个 hourglass 中的整个过程
ef hourglass_module(bottom, num_classes, num_channels, bottleneck, hgid):
'''
bottom 在第一个 hourglass 中代表的就是最开始的 feature maps
在后面的 hourglass 中代表前一个 hourglass 的输出
:param bottom:
:param num_classes:
:param num_channels:
:param bottleneck:
:param hgid:
:return:
'''
# create left features , f1, f2, f4, and f8
left_features = create_left_half_blocks(bottom, bottleneck, hgid, num_channels)
# create right features, connect with left features
rf1 = create_right_half_blocks(left_features, bottleneck, hgid, num_channels)
# add 1x1 conv with two heads, head_next_stage is sent to next stage
# head_parts is used for intermediate supervision
head_next_stage, head_parts = create_heads(bottom, rf1, num_classes, hgid, num_channels)
'''经过这个 hourglass 的输出为 head_next_stage'''
return head_next_stage, head_parts
- 很显然要组成一个
hourglass
的module
就要既包含一个stack
内,也要包含一个stack
外面。 - 首先输入上一个
hourglass
的特征图bottom
,我们通过create_left_half_blocks
得到 left_features - 然后在一个 stack 内部通过
create_right_half_blocks
得到最终一个stack
的输出特征rf1
- 然后通过
create_heads()
我们进行特征的整合并返回了我们进行中间监督所使用的head_parts
特征图和这一个hourglass
的总的输出特征head_next_stage
通过 for 循环,将多个 hourglass 进行叠加,组成 stackHourglass 网络
def create_hourglass_network(num_classes, num_stacks, num_channels, inres, outres, bottleneck):
input = Input(shape=(inres[0], inres[1], 3))
'''vgg16 提取出的 feature maps'''
front_features = create_front_module(input, num_channels, bottleneck)
head_next_stage = front_features
outputs = []
for i in range(num_stacks):
'''每一个 stack 都有一部分信息是直接来自 原始的 feature maps'''
head_next_stage, head_to_loss = hourglass_module(head_next_stage, num_classes, num_channels, bottleneck, i)
outputs.append(head_to_loss)
model = Model(inputs=input, outputs=outputs)
rms = RMSprop(lr=5e-4)
model.compile(optimizer=rms, loss=mean_squared_error, metrics=["accuracy"])
return model