Fast RCNN中的ROI池化层的实现
import cv2
import numpy as np
from keras.applications.imagenet_utils import preprocess_input
import keras
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from Fast_RCNN import test
'''
roi pool layer的输入是特征图和SelectiveSearch的结果每个候选框的坐标[x, y, w, h] (x,y)确定左上角 (x+w,y+h)确定右下角
输出batch个vector,大小为 w * h * channel (batch, w, h, channel) w和h是pool_size决定的
'''
class ROIPOOL():
def __init__(self, pool_size=(7, 7)):
self.poolsize = pool_size # 池化的尺寸,直接影响最后ROI输出的大小
def pool(self, region):
h_pool, w_pool = self.poolsize
# 获取feature_map中对应ROI区域的h,w,channel
region_height, region_weight, region_channel = region.shape
pool = np.zeros((h_pool, w_pool, region_channel)) # pool为(h_pool, w_pool, region_channel)大小的全0矩阵
# 根据论文所说,详见下面的图1,生成的sub-window数量为(h/H) * (w/W) 故每个sub-window的 高度 = h/H 宽度 = w/W
h_step = region_height / h_pool
w_step = region_weight / w_pool
for i in range(h_pool):
for j in range(w_pool):
# 分别获取sub-window的左上角、右下角的x,y坐标值
x_left = j * w_step
x_right = (j + 1) * w_step
y_top = i * h_step
y_bottom = (i + 1) * h_step
x_left = int(x_left)
x_right = int(x_right)
y_top = int(y_top)
y_bottom = int(y_bottom)
if x_left == x_right or y_top == y_bottom:
continue
# 把每个channel对应的sub-window进行maxpool,并存在pool中
pool[i, j, :] = np.max(region[y_top:y_bottom, x_left:x_right, :], axis=(0, 1))
return pool
def get_region(self, feature_map, roi_dimensions):
x_left, y_top, x_right, y_bottom = roi_dimensions
x_left = int(x_left)
y_top = int(y_top)
x_right = int(x_right)
y_bottom = int(y_bottom)
# np.squeeze(x)把x中的一维表示的删除 即x.shape = (1,1,1,3) np.squeeze(x).shape = (3)
'''
后面加上[y_top:y_bottom, x_left:x_right, :]
若y_top < y_bottom, 显示y_bottom-y_top 若x_left < x_right 显示 x_right - x_left 否则显示0
如 np.squeeze(feature_map).shape 为(128, 128, 64)
y_top, y_bottom, x_left, x_right 为 0 16 70 8
r.shape 为 (16, 0, 64)
'''
r = np.squeeze(feature_map)[y_top:y_bottom, x_left:x_right, :]
# print(np.squeeze(feature_map).shape)
# print('/'*20)
# print(y_top,y_bottom, x_left,x_right)
# print(r.shape)
# print('*'*20)
return r
def get_pooled_rois(self, feature_map, roi_batch):
pool = []
for region_dim in roi_batch:
region = self.get_region(feature_map, region_dim) #获取ROI区域
p = self.pool(region) # 对获取的ROI区域进行池化
pool.append(p)
return np.array(pool)
# 使用opencv中内置的SelectiveSearch 获取region proposal
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
img = cv2.imread('01.jpg')
print('imgshape', img.shape)
# 设置默认图片
ss.setBaseImage(img)
# 变换为快速搜索模式
ss.switchToSelectiveSearchFast()
# ssresult获取的是候选框的坐标 格式也是(x,y,w,h)
ssresult = ss.process()
# imout作为img的副本
imout = img.copy()
info = []
'''
for e, result in enumerate(ssresult):
if e < 2000: # 获取2000个候选框 但是这里不知道怎么回事只获取了500多个
# print(result)
x, y, w, h = result
# print(result)
# info.append({'index': e, 'x': x, 'y': y, 'w': w, 'h': h})
# cv2.rectangle(imout, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)
# print(info)
# plt.figure()
# plt.imshow(imout)
# plt.show()
'''
# 获取feature map
# 其中test是自己写的一个简单的卷积网络 会在后面附上
img_batch = np.expand_dims(img, axis=0)
img_batch = preprocess_input(img_batch)
model = test.model_process(input_shape=img.shape)
feature_map= model.predict(img_batch)
print('feature_map shape', feature_map.shape)
# feature_map shape (1, 16, 16, 512)
# 获取img中的region proposal在feature_map的对应位置
'''
为 feature_map的x坐标 = img的x坐标 * (img的宽度 / feature_map的宽度)
即按比例缩小 这里的s是比例
'''
s = img.shape[0] / feature_map.shape[1]
print(s)
# 获取feature map的对应位置的坐标
for e, result in enumerate(ssresult):
result[0] /= int(s)
result[1] /= int(s)
result[2] /= int(s)
result[3] /= int(s)
info.append(result) # 存储所有img的region proposal在feature_map的对应坐标(x, y, w, h)
# 使用roi pool layer
roi = ROIPOOL()
roi_result = roi.get_pooled_rois(feature_map, info)
# print(roi_result.shape)
# # (roi个数,宽,高,channel)
# 分别显示在imgout的region proposal和对应的roi pool layer的处理的结果
# 因为得到了很多个region proposal,这里只看第七个,所以n=6
n=6
_, ax = plt.subplots(2)
ax[0].imshow(feature_map[0,...,3])
x_left, y_top, x_right, y_bottom = info[n]
ax[0].add_patch(patches.Rectangle((x_left, y_top), x_right - x_left, y_bottom - y_top, edgecolor='r', facecolor='none', linewidth=1))
ax[1].imshow(roi_result[n,...,0])
plt.show()
论文中关于ROI的计算
test代码 即卷积网络代码
import keras
import numpy as np
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Input
from keras.models import Model
import matplotlib.pyplot as plt
def model_process(include_top=True, weights='imagenet', input_tensor=None, input_shape=None):
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not keras.backend.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(img_input)
x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
x = MaxPooling2D((2, 2))(x)
# x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = MaxPooling2D((2, 2))(x)
#
# x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = MaxPooling2D((2, 2))(x)
#
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = MaxPooling2D((2, 2))(x)
#
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', data_format='channels_last', kernel_initializer='uniform')(x)
# x = MaxPooling2D((2, 2))(x)
# print(x.shape)
model = Model(img_input, x)
return model