#https://blog.csdn.net/hzhj2007/article/details/79870936
#https://link.zhihu.com/?target=https%3A//medium.com/mlreview/a-guide-to-receptive-field-arithmetic-for-convolutional-neural-networks-e0f514068807
#https://zhuanlan.zhihu.com/p/56940729
net_struct = {
'alexnet': {'net': [[11, 4, 0, 1], [3, 2, 0, 1], [5, 1, 2, 1], [3, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 2, 0, 1]],
'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5']},
'vgg16': {'net': [[3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[2, 2, 0, 1]],
'name': ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2',
'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3',
'pool5']},
'zf-5': {'net': [[7, 2, 3, 1], [3, 2, 1, 1], [5, 2, 2, 1], [3, 2, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1]],
'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']},
'resnet50': {
'net': [[7, 2, 3, 1], [3, 2, 1, 1],
[1, 1, 0, 1], [3, 1, 1, 1], [1, 1, 0, 1], #[1, 1, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], #[1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], # [1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], # [1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
],
'name': ['conv1', 'maxpool',
'layer1_0_conv1', 'layer1_0_conv2', 'layer1_0_conv3', #'layer1_0_downsample',
'layer1_1_conv1', 'layer1_1_conv2', 'layer1_1_conv3',
'layer1_2_conv1', 'layer1_2_conv2', 'layer1_2_conv3',
'layer2_0_conv1', 'layer2_0_conv2', 'layer2_0_conv3', #'layer2_0_downsample',
'layer2_1_conv1', 'layer2_1_conv2', 'layer2_1_conv3',
'layer2_2_conv1', 'layer2_2_conv2', 'layer2_2_conv3',
'layer2_3_conv1', 'layer2_3_conv2', 'layer2_3_conv3',
'layer3_0_conv1', 'layer3_0_conv2', 'layer3_0_conv3', #'layer3_0_downsample',
'layer3_1_conv1', 'layer3_1_conv2', 'layer3_1_conv3',
'layer3_2_conv1', 'layer3_2_conv2', 'layer3_2_conv3',
'layer3_3_conv1', 'layer3_3_conv2', 'layer3_3_conv3',
'layer3_4_conv1', 'layer3_4_conv2', 'layer3_4_conv3',
'layer3_5_conv1', 'layer3_5_conv2', 'layer3_5_conv3',
'layer4_0_conv1', 'layer4_0_conv2', 'layer4_0_conv3', #'layer4_0_downsample',
'layer4_1_conv1', 'layer4_1_conv2', 'layer4_1_conv3',
'layer4_2_conv1', 'layer4_2_conv2', 'layer4_2_conv3',
]},
'resnet101': {'net': [[7, 2, 3, 1], [3, 2, 1, 1], [5, 2, 2, 1], [3, 2, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1]],
'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']}}
imsize = 512
open_dilation=True
if open_dilation:
def outFromIn(isz, net, layernum):
totstride = 1
insize = isz
for layer in range(layernum):
fsize, stride, pad, dilation = net[layer]
outsize = (insize - dilation * (fsize - 1) + 2 * pad) / stride + 1
insize = outsize
totstride = totstride * stride
return outsize, totstride
def inFromOut(net, layernum):
RF = 1
for layer in reversed(range(layernum)):
fsize, stride, pad, dilation = net[layer]
RF = ((RF - 1) * stride) + dilation * (fsize - 1)
return RF
def topToBottom(net, layernum, top_layer):
RF = 1
for layer in reversed(range(layernum, top_layer)): # 当前层至底层
# print 'in func {}'.format(layer)
fsize, stride, pad, dilation = net[layer]
RF = ((RF - 1) * stride) + dilation * (fsize - 1)
return RF
def bottomToTop(net, layernum):
if layernum == 1: # 从1开始计数的
return net[0][0]
r_in = net[0][0]
j_in = net[0][1]
for layer in (range(1, layernum)):
fsize, stride, pad, dilation = net[layer]
j_out = j_in * stride
r_out = r_in + dilation * (fsize - 1) * j_in
r_in = r_out
j_in = j_out
return r_out
else:
def outFromIn(isz, net, layernum):
totstride = 1
insize = isz
for layer in range(layernum):
fsize, stride, pad, _ = net[layer]
outsize = (insize - (fsize - 1) + 2 * pad) / stride + 1
insize = outsize
totstride = totstride * stride
return outsize, totstride
def inFromOut(net, layernum):
RF = 1
for layer in reversed(range(layernum)):
fsize, stride, pad, _ = net[layer]
RF = ((RF - 1) * stride) + (fsize - 1)
return RF
def topToBottom(net, layernum, top_layer):
RF = 1
for layer in reversed(range(layernum, top_layer)): # 当前层至底层
# print 'in func {}'.format(layer)
fsize, stride, pad, _ = net[layer]
RF = ((RF - 1) * stride) + (fsize - 1)
return RF
def bottomToTop(net, layernum):
if layernum == 1: # 从1开始计数的
return net[0][0]
r_in = net[0][0]
j_in = net[0][1]
for layer in (range(1, layernum)):
fsize, stride, pad, _ = net[layer]
j_out = j_in * stride
r_out = r_in + (fsize - 1) * j_in
r_in = r_out
j_in = j_out
return r_out
if __name__ == '__main__':
print("layer output sizes given image = %dx%d" % (imsize, imsize))
net = 'vgg16'
for i in range(len(net_struct[net]['net'])):
num = i+1
p = outFromIn(imsize, net_struct[net]['net'], num)
rf_uToD = inFromOut(net_struct[net]['net'], num)
rf_dToU = bottomToTop(net_struct[net]['net'], num)
# print("Layer Name = %s, Output size = %3d, Stride = % 3d, RF size: downToTop = %3d" % (
# net_struct[net]['name'][i], p[0], p[1], rf_dToU))
print("Layer Name = %s, Output size = %3d, Stride = % 3d, RF size: topToDown = %3d, downToTop = %3d" % (
net_struct[net]['name'][i], p[0], p[1], rf_uToD, rf_dToU))
# print('************net structrue name is %s**************' % net)
# for top_layer in reversed(range(len(net_struct[net]['net']))):
# for layernum in reversed(range(top_layer + 1)):
# rf_uToD = topToBottom(net_struct[net]['net'], layernum, top_layer + 1)
# print("layernum %3d, Layer Name = %s, RF size: topToDown = %3d" % (
# layernum, net_struct[net]['name'][layernum], rf_uToD))
# for net in net_struct.keys(): # 自下向上(底层至当前层)公式3
# print('************net structrue name is %s**************' % net)
# for i in range(len(net_struct[net]['net'])):
# p = outFromIn(imsize, net_struct[net]['net'], i + 1)
# rf_uToD = inFromOut(net_struct[net]['net'], i + 1)
# rf_dToU = bottomToTop(net_struct[net]['net'], i + 1)
# print("Layer Name = %s, Output size = %3d, Stride = % 3d, RF size: topToDown = %3d, downToTop = %3d" % (
# net_struct[net]['name'][i], p[0], p[1], rf_uToD, rf_dToU))
#
# for net in net_struct.keys(): # 自上向下(当前层至底层)公式5
# print('************net structrue name is %s**************' % net)
# for top_layer in reversed(range(len(net_struct[net]['net']))):
# for layernum in reversed(range(top_layer + 1)):
# rf_uToD = topToBottom(net_struct[net]['net'], layernum, top_layer + 1)
# print("layernum %3d, Layer Name = %s, RF size: topToDown = %3d" % (
# layernum, net_struct[net]['name'][layernum], rf_uToD))
# layer output sizes given image = 227x227
# Layer Name = conv1_1, Output size = 228, Stride = 1, RF size: downToTop = 3
# Layer Name = conv1_2, Output size = 229, Stride = 1, RF size: downToTop = 5
# Layer Name = pool1, Output size = 115, Stride = 2, RF size: downToTop = 6
# Layer Name = conv2_1, Output size = 116, Stride = 2, RF size: downToTop = 10
# Layer Name = conv2_2, Output size = 117, Stride = 2, RF size: downToTop = 14
# Layer Name = pool2, Output size = 59, Stride = 4, RF size: downToTop = 16
# Layer Name = conv3_1, Output size = 60, Stride = 4, RF size: downToTop = 24
# Layer Name = conv3_2, Output size = 61, Stride = 4, RF size: downToTop = 32
# Layer Name = conv3_3, Output size = 62, Stride = 4, RF size: downToTop = 40
# Layer Name = pool3, Output size = 31, Stride = 8, RF size: downToTop = 44
# Layer Name = conv4_1, Output size = 32, Stride = 8, RF size: downToTop = 60
# Layer Name = conv4_2, Output size = 33, Stride = 8, RF size: downToTop = 76
# Layer Name = conv4_3, Output size = 34, Stride = 8, RF size: downToTop = 92
# Layer Name = pool4, Output size = 17, Stride = 16, RF size: downToTop = 100
# Layer Name = conv5_1, Output size = 18, Stride = 16, RF size: downToTop = 132
# Layer Name = conv5_2, Output size = 19, Stride = 16, RF size: downToTop = 164
# Layer Name = conv5_3, Output size = 20, Stride = 16, RF size: downToTop = 196
# Layer Name = pool5, Output size = 10, Stride = 32, RF size: downToTop = 212
layer output sizes given image = 224x224
Layer Name = conv1_1, Output size = 225, Stride = 1, RF size: topToDown = 2, downToTop = 3
Layer Name = conv1_2, Output size = 226, Stride = 1, RF size: topToDown = 3, downToTop = 5
Layer Name = pool1, Output size = 113, Stride = 2, RF size: topToDown = 3, downToTop = 6
Layer Name = conv2_1, Output size = 114, Stride = 2, RF size: topToDown = 5, downToTop = 10
Layer Name = conv2_2, Output size = 115, Stride = 2, RF size: topToDown = 7, downToTop = 14
Layer Name = pool2, Output size = 58, Stride = 4, RF size: topToDown = 7, downToTop = 16
Layer Name = conv3_1, Output size = 59, Stride = 4, RF size: topToDown = 11, downToTop = 24
Layer Name = conv3_2, Output size = 60, Stride = 4, RF size: topToDown = 15, downToTop = 32
Layer Name = conv3_3, Output size = 61, Stride = 4, RF size: topToDown = 19, downToTop = 40
Layer Name = pool3, Output size = 31, Stride = 8, RF size: topToDown = 19, downToTop = 44
Layer Name = conv4_1, Output size = 32, Stride = 8, RF size: topToDown = 27, downToTop = 60
Layer Name = conv4_2, Output size = 33, Stride = 8, RF size: topToDown = 35, downToTop = 76
Layer Name = conv4_3, Output size = 34, Stride = 8, RF size: topToDown = 43, downToTop = 92
Layer Name = pool4, Output size = 17, Stride = 16, RF size: topToDown = 43, downToTop = 100
Layer Name = conv5_1, Output size = 18, Stride = 16, RF size: topToDown = 59, downToTop = 132
Layer Name = conv5_2, Output size = 19, Stride = 16, RF size: topToDown = 75, downToTop = 164
Layer Name = conv5_3, Output size = 20, Stride = 16, RF size: topToDown = 91, downToTop = 196
Layer Name = pool5, Output size = 10, Stride = 32, RF size: topToDown = 91, downToTop = 212
# [filter size, stride, padding]
# Assume the two dimensions are the same
# Each kernel requires the following parameters:
# - k_i: kernel size
# - s_i: stride
# - p_i: padding (if padding is uneven, right padding will higher than left padding; "SAME" option in tensorflow)
#
# Each layer i requires the following parameters to be fully represented:
# - n_i: number of feature (data layer has n_1 = imagesize )
# - j_i: distance (projected to image pixel distance) between center of two adjacent features
# - r_i: receptive field of a feature in layer i
# - start_i: position of the first feature's receptive field in layer i (idx start from 0, negative means the center fall into padding)
import math
# convnet = [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [3, 2, 0], [6, 1, 0],
# [1, 1, 0]]
# layer_names = ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5', 'fc6-conv', 'fc7-conv']
convnet = [[7, 2, 3, 1], [3, 2, 1, 1],
[1, 1, 0, 1], [3, 1, 1, 1], [1, 1, 0, 1], # [1, 1, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], # [1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], # [1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[1, 1, 0, 1], [3, 2, 1, 1], [1, 1, 0, 1], # [1, 2, 0, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
[3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1],
]
layer_names = ['conv1', 'maxpool',
'layer1_0_conv1', 'layer1_0_conv2', 'layer1_0_conv3', # 'layer1_0_downsample',
'layer1_1_conv1', 'layer1_1_conv2', 'layer1_1_conv3',
'layer1_2_conv1', 'layer1_2_conv2', 'layer1_2_conv3',
'layer2_0_conv1', 'layer2_0_conv2', 'layer2_0_conv3', # 'layer2_0_downsample',
'layer2_1_conv1', 'layer2_1_conv2', 'layer2_1_conv3',
'layer2_2_conv1', 'layer2_2_conv2', 'layer2_2_conv3',
'layer2_3_conv1', 'layer2_3_conv2', 'layer2_3_conv3',
'layer3_0_conv1', 'layer3_0_conv2', 'layer3_0_conv3', # 'layer3_0_downsample',
'layer3_1_conv1', 'layer3_1_conv2', 'layer3_1_conv3',
'layer3_2_conv1', 'layer3_2_conv2', 'layer3_2_conv3',
'layer3_3_conv1', 'layer3_3_conv2', 'layer3_3_conv3',
'layer3_4_conv1', 'layer3_4_conv2', 'layer3_4_conv3',
'layer3_5_conv1', 'layer3_5_conv2', 'layer3_5_conv3',
'layer4_0_conv1', 'layer4_0_conv2', 'layer4_0_conv3', # 'layer4_0_downsample',
'layer4_1_conv1', 'layer4_1_conv2', 'layer4_1_conv3',
'layer4_2_conv1', 'layer4_2_conv2', 'layer4_2_conv3',
]
# convnet = [[3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1],
# [3, 1, 1, 1], [3, 1, 1, 1],
# [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1], [3, 1, 1, 1], [2, 2, 0, 1], [3, 1, 1, 1], [3, 1, 1, 1],
# [3, 1, 1, 1],
# [2, 2, 0, 1]]
# layer_names = ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2',
# 'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3',
# 'pool5']
imsize = 224
def outFromIn(conv, layerIn):
n_in = layerIn[0]
j_in = layerIn[1]
r_in = layerIn[2]
start_in = layerIn[3]
k = conv[0]
s = conv[1]
p = conv[2]
n_out = math.floor((n_in - k + 2 * p) / s) + 1
actualP = (n_out - 1) * s - n_in + k
pR = math.ceil(actualP / 2)
pL = math.floor(actualP / 2)
j_out = j_in * s
r_out = r_in + (k - 1) * j_in
start_out = start_in + ((k - 1) / 2 - pL) * j_in
return n_out, j_out, r_out, start_out
def printLayer(layer, layer_name):
print(layer_name + ":")
print("\t n features: %s \n \t jump: %s \n \t receptive size: %s \t start: %s " % (
layer[0], layer[1], layer[2], layer[3]))
layerInfos = []
if __name__ == '__main__':
# first layer is the data layer (image) with n_0 = image size; j_0 = 1; r_0 = 1; and start_0 = 0.5
print ("-------Net summary------")
currentLayer = [imsize, 1, 1, 0.5]
printLayer(currentLayer, "input image")
for i in range(len(convnet)):
currentLayer = outFromIn(convnet[i], currentLayer)
layerInfos.append(currentLayer)
printLayer(currentLayer, layer_names[i])
-------Net summary------
input image:
n features: 224
jump: 1
receptive size: 1 start: 0.5
conv1_1:
n features: 224
jump: 1
receptive size: 3 start: 0.5
conv1_2:
n features: 224
jump: 1
receptive size: 5 start: 0.5
pool1:
n features: 112
jump: 2
receptive size: 6 start: 1.0
conv2_1:
n features: 112
jump: 2
receptive size: 10 start: 1.0
conv2_2:
n features: 112
jump: 2
receptive size: 14 start: 1.0
pool2:
n features: 56
jump: 4
receptive size: 16 start: 2.0
conv3_1:
n features: 56
jump: 4
receptive size: 24 start: 2.0
conv3_2:
n features: 56
jump: 4
receptive size: 32 start: 2.0
conv3_3:
n features: 56
jump: 4
receptive size: 40 start: 2.0
pool3:
n features: 28
jump: 8
receptive size: 44 start: 4.0
conv4_1:
n features: 28
jump: 8
receptive size: 60 start: 4.0
conv4_2:
n features: 28
jump: 8
receptive size: 76 start: 4.0
conv4_3:
n features: 28
jump: 8
receptive size: 92 start: 4.0
pool4:
n features: 14
jump: 16
receptive size: 100 start: 8.0
conv5_1:
n features: 14
jump: 16
receptive size: 132 start: 8.0
conv5_2:
n features: 14
jump: 16
receptive size: 164 start: 8.0
conv5_3:
n features: 14
jump: 16
receptive size: 196 start: 8.0
pool5:
n features: 7
jump: 32
receptive size: 212 start: 16.0