主要参考晓雷:原始图片中的ROI如何映射到到feature map?
写的非常好,从卷积神经网络物体检测之感受野大小计算 - machineLearning - 博客园粘了代码之后,做了一些改动,目的是为了实现:原图上的框图坐标映射到网络各个层时候相应的坐标,以及特征图上的框图坐标反映射到各个层上时候相应的坐标。主要用到这个公式,以及它的反推
代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
net_struct = {
'alexnet': {'net': [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [3, 2, 0]],
'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5']},
'zf-5': {'net': [[7, 2, 3], [3, 2, 1], [5, 2, 2], [3, 2, 1], [3, 1, 1], [3, 1, 1], [3, 1, 1]],
'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']},
'vgg16': {'net': [[3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
[2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
[2, 2, 0]],
'name': ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2',
'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3',
'pool5']}}
imsize = 224
def outFromIn(isz, net, layernum):
totstride = 1
insize = isz
for layer in range(layernum):
fsize, stride, pad = net[layer]
outsize = (insize - fsize + 2 * pad) / stride + 1
insize = outsize
totstride = totstride * stride
return outsize, totstride
def inFromOut(net, layernum):
RF = 1
for layer in reversed(range(layernum)):
fsize, stride, pad = net[layer]
RF = ((RF - 1) * stride) + fsize
return RF
def back_forth_rf (net, mm, nn):
for i in reversed(range(len(net))):
fsize, stride, pad = net[i]
mm[0] = stride*mm[0] +((fsize - 1) / 2 - pad)
mm[1] = stride*mm[1] +((fsize - 1) / 2 - pad)
mm[2] = stride * mm[2] + ((fsize - 1) / 2 - pad)
mm[3] = stride * mm[3] + ((fsize - 1) / 2 - pad)
qq = copy.deepcopy(mm)
mmm.append(qq)
mmm.reverse() #原mmm是从top到bottom排列的,所以要反转一下,使其从bottom到top
for i in range(len(net)):
fsize, stride, pad = net[i]
nn[0] = (nn[0] - ((fsize - 1) / 2 - pad)) / stride
nn[1] = (nn[1] - ((fsize - 1) / 2 - pad)) / stride
nn[2] = (nn[2] - ((fsize - 1) / 2 - pad)) / stride
nn[3] = (nn[3] - ((fsize - 1) / 2 - pad)) / stride
pp = copy.deepcopy(nn) #如果不用deepcopy中转,而是直接nnn.append(nn),则会出现nnn到最后只显示nn的最后一个数据
nnn.append(pp)
return mmm, nnn
if __name__ == '__main__':
print "layer output sizes given image = %dx%d" % (imsize, imsize)
for net in net_struct.keys():
print '************net structrue name is %s**************' % net
ppp = [] #保存outsize, totstride
mmm = [] #保存从特征图映射到原图,各层框图坐标
nnn = [] #保存从原图映射到特征图,各层框图坐标
rff = [] #保存RF
qq = [] #中转容器
pp = [] #中转容器
if net == 'zf-5':
mm=[14,14,12,13] #根据该网络top层大小,随便定的框图坐标
elif net =='vgg16':
mm=[7,7,6,2]
else:
mm=[5,5,2,3]
nn = [96, 96, 56, 65] #根据原图大小,随便定的框图坐标
for i in range(len(net_struct[net]['net'])):
p = outFromIn(imsize, net_struct[net]['net'], i + 1)
rf = inFromOut(net_struct[net]['net'], i + 1)
ppp.append(p)
rff.append(rf)
q = back_forth_rf(net_struct[net]['net'], mm, nn)
for i in range(len(net_struct[net]['net'])):
print "Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d, forth RF=%3s, back RF=%3s" % (
net_struct[net]['name'][i], ppp[i][0], ppp[i][1], rff[i], q[0][i], q[1][i])
运行结果如下:
D:\Anaconda\python.exe E:/CNN/感受野遗留问题/RF.py
layer output sizes given image = 224x224
************net structrue name is vgg16**************
Layer Name = conv1_1, Output size = 224, Stride = 1, RF size = 3, forth RF=[224, 224, 192, 64], back RF=[96, 96, 56, 65]
Layer Name = conv1_2, Output size = 224, Stride = 1, RF size = 5, forth RF=[224, 224, 192, 64], back RF=[96, 96, 56, 65]
Layer Name = pool1, Output size = 112, Stride = 2, RF size = 6, forth RF=[224, 224, 192, 64], back RF=[48, 48, 28, 32]
Layer Name = conv2_1, Output size = 112, Stride = 2, RF size = 10, forth RF=[112, 112, 96, 32], back RF=[48, 48, 28, 32]
Layer Name = conv2_2, Output size = 112, Stride = 2, RF size = 14, forth RF=[112, 112, 96, 32], back RF=[48, 48, 28, 32]
Layer Name = pool2, Output size = 56, Stride = 4, RF size = 16, forth RF=[112, 112, 96, 32], back RF=[24, 24, 14, 16]
Layer Name = conv3_1, Output size = 56, Stride = 4, RF size = 24, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = conv3_2, Output size = 56, Stride = 4, RF size = 32, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = conv3_3, Output size = 56, Stride = 4, RF size = 40, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = pool3, Output size = 28, Stride = 8, RF size = 44, forth RF=[56, 56, 48, 16], back RF=[12, 12, 7, 8]
Layer Name = conv4_1, Output size = 28, Stride = 8, RF size = 60, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = conv4_2, Output size = 28, Stride = 8, RF size = 76, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = conv4_3, Output size = 28, Stride = 8, RF size = 92, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = pool4, Output size = 14, Stride = 16, RF size = 100, forth RF=[28, 28, 24, 8], back RF=[6, 6, 3, 4]
Layer Name = conv5_1, Output size = 14, Stride = 16, RF size = 132, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = conv5_2, Output size = 14, Stride = 16, RF size = 164, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = conv5_3, Output size = 14, Stride = 16, RF size = 196, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = pool5, Output size = 7, Stride = 32, RF size = 212, forth RF=[14, 14, 12, 4], back RF=[3, 3, 1, 2]
************net structrue name is zf-5**************
Layer Name = conv1, Output size = 112, Stride = 2, RF size = 7, forth RF=[224, 224, 192, 208], back RF=[48, 48, 28, 32]
Layer Name = pool1, Output size = 56, Stride = 4, RF size = 11, forth RF=[112, 112, 96, 104], back RF=[24, 24, 14, 16]
Layer Name = conv2, Output size = 28, Stride = 8, RF size = 27, forth RF=[56, 56, 48, 52], back RF=[12, 12, 7, 8]
Layer Name = pool2, Output size = 14, Stride = 16, RF size = 43, forth RF=[28, 28, 24, 26], back RF=[6, 6, 3, 4]
Layer Name = conv3, Output size = 14, Stride = 16, RF size = 75, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
Layer Name = conv4, Output size = 14, Stride = 16, RF size = 107, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
Layer Name = conv5, Output size = 14, Stride = 16, RF size = 139, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
************net structrue name is alexnet**************
Layer Name = conv1, Output size = 54, Stride = 4, RF size = 11, forth RF=[193, 193, 97, 129], back RF=[22, 22, 12, 15]
Layer Name = pool1, Output size = 26, Stride = 8, RF size = 19, forth RF=[47, 47, 23, 31], back RF=[10, 10, 5, 7]
Layer Name = conv2, Output size = 26, Stride = 8, RF size = 51, forth RF=[23, 23, 11, 15], back RF=[10, 10, 5, 7]
Layer Name = pool2, Output size = 12, Stride = 16, RF size = 67, forth RF=[23, 23, 11, 15], back RF=[4, 4, 2, 3]
Layer Name = conv3, Output size = 12, Stride = 16, RF size = 99, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = conv4, Output size = 12, Stride = 16, RF size = 131, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = conv5, Output size = 12, Stride = 16, RF size = 163, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = pool5, Output size = 5, Stride = 32, RF size = 195, forth RF=[11, 11, 5, 7], back RF=[1, 1, 0, 1]
Process finished with exit code 0
学习总结,不喜勿喷。