ROI池层:使用tensorflow的crop_and_resize函数作为替换,crop_and_resize需要标准化坐标中的框索引。卷积特征图被裁剪为常数大小(14,14),然后maxpooled为(7x7)
import tensorflow as tf
import numpy as np
import cv2
#featureMaps指的是共享卷积层输出的特征图
# rois指的是需要被pool的框的坐标,shape[M, 5],后一维中5个值中的第一个值指的是该框在某一个训练batch中的图片索引
# im_dims指的是图片的尺度,shape[N, 2],N指的是batch_size,这里固定为1
def roi_pool(featureMaps,rois,im_dims):
'''
Regions of Interest (ROIs) from the Region Proposal Network (RPN) are
formatted as: (image_id, x1, y1, x2, y2)
Note: Since mini-batches are sampled from a single image, image_id = 0s
'''
with tf.variable_scope('roi_pool'):
#在这里取到所有需要pool的框在训练batch中的对应图片序号,由于batch_size为1,因此box_ind里面的值都为0
box_ind = tf.cast(rois[:, 0], dtype=tf.int32)
# ROI box coordinates. Must be normalized and ordered to [y1, x1, y2, x2]
#在这里取到所有的需要pool的框 shape[N, 4]
boxes = rois[:, 1:]
#在这里取到归一化框的坐标时需要的图片尺度
normalization = tf.cast(tf.stack([im_dims[:, 1], im_dims[:, 0], im_dims[:, 1], im_dims[:, 0]], axis=1),
dtype=tf.float32)
boxes = tf.div(boxes,normalization) #在这里归一化框的坐标
# y1, x1, y2, x2 在这里交换框的坐标(x1, y1, x2, y2)->(y1, x1, y2, x2)
boxes = tf.stack([boxes[:, 1], boxes[:, 0], boxes[:, 3], boxes[:, 2]], axis=1)
# ROI pool output size在这里规定初始pool过后输出的尺度
crop_size = tf.constant([640, 1024])
# ROI pool
#进行ROI pool,之所以需要归一化框的坐标是因为tf接口的要求
pooledFeatures = tf.image.crop_and_resize(image=featureMaps, boxes=boxes, box_ind=box_ind, crop_size=crop_size)
#print('pooledFeatures',pooledFeatures)
#用2×2的滑动窗口进行最大池化操作,输出的尺度是7×7
pooledFeatures = tf.nn.max_pool(pooledFeatures, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#print('pooledFeatures', pooledFeatures)
return pooledFeatures
if __name__=="__main__":
path = 'D:\document\\image1.jpg'
img=cv2.imread(path)
print(img.shape)
#featureMaps = np.array(np.random.uniform(0, 255, size=(1,800, 800, 3)), dtype=np.uint8)
featureMaps = np.array([img])
#print(featureMaps .shape)
rois=np.array([[0,70,270,600,510],[0,0,0,740,180]],np.float32)
im_dims=np.array([[640,1024]])
#print(featureMaps.shape)
#print(rois.shape)
#print(im_dims.shape)
pooledFeatures_result=roi_pool(featureMaps,rois,im_dims)
#print(pooledFeatures_result)
with tf.Session() as sess:
pooledFeatures_result=sess.run(pooledFeatures_result)
print(pooledFeatures_result.shape)
#img=featureMaps[0]
pooledFeatures=np.array(pooledFeatures_result,np.uint8)
for i in range(2):
x1,y1,x2,y2=rois[i][1:]
cv2.rectangle(img,(x1,y1),(x2,y2),(0,0,255),3)
cv2.imshow('img', img)
cv2.imshow('feature1',pooledFeatures[0])
cv2.imshow('feature2', pooledFeatures[1])
cv2.imwrite('D:\document\\imagex.jpg',pooledFeatures[0])
cv2.waitKey()
cv2.destroyAllWindows()
tf.image.crop_and_resize详解
函数原型: tf.image.crop_and_resize(image,boxes, box_ind,crop_size,method='bilinear',extrapolation_value=0,name=None)
功能:将batch中的每一张图像boxes指定部分的图像调整到crop_size的大小
arg:
1.image: A `Tensor`. A 4-D tensor of shape `[batch, image_height, image_width, depth]`
支持的数据类型`uint8`, `uint16`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
2. boxes: A `Tensor` of type `float32`,shape [M, y1, x1, y2,x2],需要标准化[ y1/H,x1/W,y2/H,x2/W ]
3.box_ind: A `Tensor` of type `int32`. shape [num_boxes]` with int32 values in `[0, batch)
为图像对应在一个batch数据中的序号,故大小为 0=<num_boxes<batch
4.crop_size: A `Tensor` of type `int32`.size = [crop_height, crop_width]
5.method: An optional `string` from: `"bilinear", "nearest"`. Defaults to `"bilinear"`.