Fast RCNN中的ROI POOL的python实现

最新推荐文章于 2023-06-08 09:54:25 发布

譕訫_

最新推荐文章于 2023-06-08 09:54:25 发布

阅读量530

点赞数 1

分类专栏：论文文章标签：计算机视觉算法深度学习卷积

本文链接：https://blog.csdn.net/weixin_41963310/article/details/109289925

版权

论文专栏收录该内容

22 篇文章 0 订阅

订阅专栏

Fast RCNN中的ROI池化层的实现

import cv2
import numpy as np
from keras.applications.imagenet_utils import preprocess_input
import keras
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from Fast_RCNN import test
'''
roi pool layer的输入是特征图和SelectiveSearch的结果每个候选框的坐标[x, y, w, h]  (x,y)确定左上角 (x+w,y+h)确定右下角
                输出batch个vector，大小为 w * h * channel  (batch, w, h, channel)  w和h是pool_size决定的
'''

class ROIPOOL():
    def __init__(self, pool_size=(7, 7)):
        self.poolsize = pool_size  # 池化的尺寸，直接影响最后ROI输出的大小

    def pool(self, region):
        h_pool, w_pool = self.poolsize

        # 获取feature_map中对应ROI区域的h,w,channel
        region_height, region_weight, region_channel = region.shape

        pool = np.zeros((h_pool, w_pool, region_channel)) # pool为(h_pool, w_pool, region_channel)大小的全0矩阵

        # 根据论文所说，详见下面的图1，生成的sub-window数量为(h/H) * (w/W) 故每个sub-window的  高度 = h/H  宽度 = w/W
        h_step = region_height / h_pool
        w_step = region_weight / w_pool

        for i in range(h_pool):
            for j in range(w_pool):
                # 分别获取sub-window的左上角、右下角的x，y坐标值
                x_left = j * w_step
                x_right = (j + 1) * w_step
                y_top = i * h_step
                y_bottom = (i + 1) * h_step

                x_left = int(x_left)
                x_right = int(x_right)
                y_top = int(y_top)
                y_bottom = int(y_bottom)

                if x_left == x_right or y_top == y_bottom:
                    continue
                # 把每个channel对应的sub-window进行maxpool，并存在pool中
                pool[i, j, :] = np.max(region[y_top:y_bottom, x_left:x_right, :], axis=(0, 1))
        return pool

    def get_region(self, feature_map, roi_dimensions):
        x_left, y_top, x_right, y_bottom = roi_dimensions
        x_left = int(x_left)
        y_top = int(y_top)
        x_right = int(x_right)
        y_bottom = int(y_bottom)

        # np.squeeze(x)把x中的一维表示的删除  即x.shape = (1,1,1,3) np.squeeze(x).shape = (3)
        '''
        后面加上[y_top:y_bottom, x_left:x_right, :]  
        若y_top < y_bottom, 显示y_bottom-y_top   若x_left < x_right  显示 x_right - x_left 否则显示0
        如  np.squeeze(feature_map).shape 为(128, 128, 64)
        y_top, y_bottom, x_left, x_right 为  0 16 70 8
        r.shape 为 (16, 0, 64)
        '''
        
        r = np.squeeze(feature_map)[y_top:y_bottom, x_left:x_right, :]
        
        # print(np.squeeze(feature_map).shape)
        # print('/'*20)
        # print(y_top,y_bottom, x_left,x_right)
        # print(r.shape)
        # print('*'*20)

        return r

    def get_pooled_rois(self, feature_map, roi_batch):
        pool = []
        for region_dim in roi_batch:
            region = self.get_region(feature_map, region_dim)  #获取ROI区域
            p = self.pool(region)  # 对获取的ROI区域进行池化
            pool.append(p)

        return np.array(pool)


# 使用opencv中内置的SelectiveSearch 获取region proposal
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation() 

img = cv2.imread('01.jpg')
print('imgshape', img.shape)

# 设置默认图片
ss.setBaseImage(img)
# 变换为快速搜索模式
ss.switchToSelectiveSearchFast()
# ssresult获取的是候选框的坐标  格式也是(x,y,w,h)
ssresult = ss.process()
# imout作为img的副本
imout = img.copy()

info = []

'''
for e, result in enumerate(ssresult):
    if e < 2000: # 获取2000个候选框  但是这里不知道怎么回事只获取了500多个
        # print(result)
        x, y, w, h = result
        # print(result)
        # info.append({'index': e, 'x': x, 'y': y, 'w': w, 'h': h})

        # cv2.rectangle(imout, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)

# print(info)

# plt.figure()
# plt.imshow(imout)
# plt.show()
'''
# 获取feature map
# 其中test是自己写的一个简单的卷积网络  会在后面附上
img_batch = np.expand_dims(img, axis=0)
img_batch = preprocess_input(img_batch)
model = test.model_process(input_shape=img.shape)
feature_map= model.predict(img_batch)
print('feature_map shape', feature_map.shape)
# feature_map shape (1, 16, 16, 512)


# 获取img中的region proposal在feature_map的对应位置  
'''
为 feature_map的x坐标 = img的x坐标 * (img的宽度 / feature_map的宽度)
即按比例缩小 这里的s是比例
'''
s = img.shape[0] / feature_map.shape[1]
print(s)

# 获取feature map的对应位置的坐标
for e, result in enumerate(ssresult):
    result[0] /= int(s)
    result[1] /= int(s)
    result[2] /= int(s)
    result[3] /= int(s)
    info.append(result) # 存储所有img的region proposal在feature_map的对应坐标（x, y, w, h）

# 使用roi pool layer
roi = ROIPOOL()
roi_result = roi.get_pooled_rois(feature_map, info)

# print(roi_result.shape)
# # (roi个数，宽，高，channel)


# 分别显示在imgout的region proposal和对应的roi pool layer的处理的结果
# 因为得到了很多个region proposal，这里只看第七个，所以n=6
n=6
_, ax = plt.subplots(2)
ax[0].imshow(feature_map[0,...,3])
x_left, y_top, x_right, y_bottom = info[n]
ax[0].add_patch(patches.Rectangle((x_left, y_top), x_right - x_left, y_bottom - y_top, edgecolor='r', facecolor='none', linewidth=1))
ax[1].imshow(roi_result[n,...,0])
plt.show()

论文中关于ROI的计算

在这里插入图片描述

图1

在这里插入图片描述

feature map 和 ROI池化层处理结果

test代码即卷积网络代码

import keras
import numpy as np
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Input
from keras.models import Model
import matplotlib.pyplot as plt

def model_process(include_top=True, weights='imagenet', input_tensor=None, input_shape=None):

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not keras.backend.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(img_input)
    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    x = MaxPooling2D((2, 2))(x)

    # x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = MaxPooling2D((2, 2))(x)
    #
    # x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = MaxPooling2D((2, 2))(x)
    #
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = MaxPooling2D((2, 2))(x)
    #
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
    # x = MaxPooling2D((2, 2))(x)
    # print(x.shape)

    model = Model(img_input, x)

    return model