原始图片中的ROI如何映射到到feature map上?

主要参考晓雷:原始图片中的ROI如何映射到到feature map?

写的非常好,从卷积神经网络物体检测之感受野大小计算 - machineLearning - 博客园粘了代码之后,做了一些改动,目的是为了实现:原图上的框图坐标映射到网络各个层时候相应的坐标,以及特征图上的框图坐标反映射到各个层上时候相应的坐标。主要用到这个公式,以及它的反推 p_{i+1}=... 

代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
net_struct = {
    'alexnet': {'net': [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [3, 2, 0]],
                'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5']},
    'zf-5': {'net': [[7, 2, 3], [3, 2, 1], [5, 2, 2], [3, 2, 1], [3, 1, 1], [3, 1, 1], [3, 1, 1]],
             'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']},
   'vgg16': {'net': [[3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
                     [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
                     [2, 2, 0]],
             'name': ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2',
                      'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3',
                      'pool5']}}

imsize = 224

def outFromIn(isz, net, layernum):
    totstride = 1
    insize = isz
    for layer in range(layernum):
        fsize, stride, pad = net[layer]
        outsize = (insize - fsize + 2 * pad) / stride + 1
        insize = outsize
        totstride = totstride * stride
    return outsize, totstride

def inFromOut(net, layernum):
    RF = 1
    for layer in reversed(range(layernum)):
        fsize, stride, pad = net[layer]
        RF = ((RF - 1) * stride) + fsize
    return RF

def back_forth_rf (net, mm, nn):
    for i in reversed(range(len(net))):
        fsize, stride, pad = net[i]
        mm[0] = stride*mm[0] +((fsize - 1) / 2 - pad)
        mm[1] = stride*mm[1] +((fsize - 1) / 2 - pad)
        mm[2] = stride * mm[2] + ((fsize - 1) / 2 - pad)
        mm[3] = stride * mm[3] + ((fsize - 1) / 2 - pad)
        qq = copy.deepcopy(mm)
        mmm.append(qq)
    mmm.reverse()  #原mmm是从top到bottom排列的,所以要反转一下,使其从bottom到top
    for i in range(len(net)):
        fsize, stride, pad = net[i]
        nn[0] = (nn[0] - ((fsize - 1) / 2 - pad)) / stride
        nn[1] = (nn[1] - ((fsize - 1) / 2 - pad)) / stride
        nn[2] = (nn[2] - ((fsize - 1) / 2 - pad)) / stride
        nn[3] = (nn[3] - ((fsize - 1) / 2 - pad)) / stride
        pp = copy.deepcopy(nn)  #如果不用deepcopy中转,而是直接nnn.append(nn),则会出现nnn到最后只显示nn的最后一个数据
        nnn.append(pp)
    return mmm, nnn

if __name__ == '__main__':
    print "layer output sizes given image = %dx%d" % (imsize, imsize)
    for net in net_struct.keys():
        print '************net structrue name is %s**************' % net
        ppp = []  #保存outsize, totstride
        mmm = []  #保存从特征图映射到原图,各层框图坐标
        nnn = []  #保存从原图映射到特征图,各层框图坐标
        rff = []  #保存RF
        qq = []   #中转容器
        pp = []   #中转容器
        if net == 'zf-5':
            mm=[14,14,12,13]  #根据该网络top层大小,随便定的框图坐标
        elif net =='vgg16':
            mm=[7,7,6,2]
        else:
            mm=[5,5,2,3]
        nn = [96, 96, 56, 65]  #根据原图大小,随便定的框图坐标
        for i in range(len(net_struct[net]['net'])):
            p = outFromIn(imsize, net_struct[net]['net'], i + 1)
            rf = inFromOut(net_struct[net]['net'], i + 1)
            ppp.append(p)
            rff.append(rf)
        q = back_forth_rf(net_struct[net]['net'], mm, nn)
        for i in range(len(net_struct[net]['net'])):
            print "Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d, forth RF=%3s, back RF=%3s" % (
            net_struct[net]['name'][i], ppp[i][0], ppp[i][1], rff[i], q[0][i], q[1][i])

运行结果如下:

D:\Anaconda\python.exe E:/CNN/感受野遗留问题/RF.py
layer output sizes given image = 224x224
************net structrue name is vgg16**************
Layer Name = conv1_1, Output size = 224, Stride =   1, RF size =   3, forth RF=[224, 224, 192, 64], back RF=[96, 96, 56, 65]
Layer Name = conv1_2, Output size = 224, Stride =   1, RF size =   5, forth RF=[224, 224, 192, 64], back RF=[96, 96, 56, 65]
Layer Name = pool1, Output size = 112, Stride =   2, RF size =   6, forth RF=[224, 224, 192, 64], back RF=[48, 48, 28, 32]
Layer Name = conv2_1, Output size = 112, Stride =   2, RF size =  10, forth RF=[112, 112, 96, 32], back RF=[48, 48, 28, 32]
Layer Name = conv2_2, Output size = 112, Stride =   2, RF size =  14, forth RF=[112, 112, 96, 32], back RF=[48, 48, 28, 32]
Layer Name = pool2, Output size =  56, Stride =   4, RF size =  16, forth RF=[112, 112, 96, 32], back RF=[24, 24, 14, 16]
Layer Name = conv3_1, Output size =  56, Stride =   4, RF size =  24, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = conv3_2, Output size =  56, Stride =   4, RF size =  32, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = conv3_3, Output size =  56, Stride =   4, RF size =  40, forth RF=[56, 56, 48, 16], back RF=[24, 24, 14, 16]
Layer Name = pool3, Output size =  28, Stride =   8, RF size =  44, forth RF=[56, 56, 48, 16], back RF=[12, 12, 7, 8]
Layer Name = conv4_1, Output size =  28, Stride =   8, RF size =  60, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = conv4_2, Output size =  28, Stride =   8, RF size =  76, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = conv4_3, Output size =  28, Stride =   8, RF size =  92, forth RF=[28, 28, 24, 8], back RF=[12, 12, 7, 8]
Layer Name = pool4, Output size =  14, Stride =  16, RF size = 100, forth RF=[28, 28, 24, 8], back RF=[6, 6, 3, 4]
Layer Name = conv5_1, Output size =  14, Stride =  16, RF size = 132, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = conv5_2, Output size =  14, Stride =  16, RF size = 164, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = conv5_3, Output size =  14, Stride =  16, RF size = 196, forth RF=[14, 14, 12, 4], back RF=[6, 6, 3, 4]
Layer Name = pool5, Output size =   7, Stride =  32, RF size = 212, forth RF=[14, 14, 12, 4], back RF=[3, 3, 1, 2]
************net structrue name is zf-5**************
Layer Name = conv1, Output size = 112, Stride =   2, RF size =   7, forth RF=[224, 224, 192, 208], back RF=[48, 48, 28, 32]
Layer Name = pool1, Output size =  56, Stride =   4, RF size =  11, forth RF=[112, 112, 96, 104], back RF=[24, 24, 14, 16]
Layer Name = conv2, Output size =  28, Stride =   8, RF size =  27, forth RF=[56, 56, 48, 52], back RF=[12, 12, 7, 8]
Layer Name = pool2, Output size =  14, Stride =  16, RF size =  43, forth RF=[28, 28, 24, 26], back RF=[6, 6, 3, 4]
Layer Name = conv3, Output size =  14, Stride =  16, RF size =  75, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
Layer Name = conv4, Output size =  14, Stride =  16, RF size = 107, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
Layer Name = conv5, Output size =  14, Stride =  16, RF size = 139, forth RF=[14, 14, 12, 13], back RF=[6, 6, 3, 4]
************net structrue name is alexnet**************
Layer Name = conv1, Output size =  54, Stride =   4, RF size =  11, forth RF=[193, 193, 97, 129], back RF=[22, 22, 12, 15]
Layer Name = pool1, Output size =  26, Stride =   8, RF size =  19, forth RF=[47, 47, 23, 31], back RF=[10, 10, 5, 7]
Layer Name = conv2, Output size =  26, Stride =   8, RF size =  51, forth RF=[23, 23, 11, 15], back RF=[10, 10, 5, 7]
Layer Name = pool2, Output size =  12, Stride =  16, RF size =  67, forth RF=[23, 23, 11, 15], back RF=[4, 4, 2, 3]
Layer Name = conv3, Output size =  12, Stride =  16, RF size =  99, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = conv4, Output size =  12, Stride =  16, RF size = 131, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = conv5, Output size =  12, Stride =  16, RF size = 163, forth RF=[11, 11, 5, 7], back RF=[4, 4, 2, 3]
Layer Name = pool5, Output size =   5, Stride =  32, RF size = 195, forth RF=[11, 11, 5, 7], back RF=[1, 1, 0, 1]

Process finished with exit code 0
学习总结,不喜勿喷。


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值