代码记录：Spatial pyramid pooling Implementation

最新推荐文章于 2023-09-19 16:37:41 发布

MarkJhon

最新推荐文章于 2023-09-19 16:37:41 发布

阅读量136

点赞数

分类专栏： python 算法机器学习笔记文章标签： tensorflow 神经网络

本文链接：https://blog.csdn.net/qq_31554953/article/details/110874202

版权

numpy 空间金字塔池化卷积神经网络池化操作特征图

关键词由CSDN通过智能技术生成

python 同时被 3 个专栏收录

37 篇文章 0 订阅

订阅专栏

算法

9 篇文章 0 订阅

订阅专栏

机器学习笔记

3 篇文章 0 订阅

订阅专栏

import numpy as np
import math
import tensorflow as tf

#inupt_feature maps if of the form: N, C, H, W
def np_spatial_pyramid_pooling(input_feature_maps, spatial_pyramid, dtype=np.float32):
    assert input_feature_maps.ndim == 4
    assert spatial_pyramid.ndim == 2
    assert spatial_pyramid.shape[1] == 2
    
    batch_size = input_feature_maps.shape[0]
    num_channels = input_feature_maps.shape[1]
    h = input_feature_maps.shape[2]
    w = input_feature_maps.shape[3]
    
    num_levels = spatial_pyramid.shape[0]
    
    #N-C-W*H
    flattened_feature_maps = np.reshape(input_feature_maps, (batch_size, num_channels, -1))
    # num_px: N*H
    num_px = flattened_feature_maps.shape[2]
    
    #求特定维度的积
    bins_per_level = np.prod(spatial_pyramid, axis=1)
    print(bins_per_level)
    num_bins = np.sum(bins_per_level)
    stack = []
    
    #stride tricks, then max pooling along one dimension
    #then stride tricks again and max pool along the other dimension
    #but whats the length and stride?
    #ceil(w/n) for window size, floor(w/n) for stride,
    #where w is the original dim, and n is the number of bins along the dim
    #but this implementation may leave out some pixles (consider w = 5, n =3)
    
    sizeof_item = np.dtype(dtype).itemsize
    
    #cycle 4 times
    for i in range(num_levels):
        n_h = spatial_pyramid[i][0]
        n_w = spatial_pyramid[i][1]
        
        l = math.ceil(w/n_w)
        s = math.floor(w/n_w)
        
        ar = np.lib.stride_tricks.as_strided(flattened_feature_maps, (batch_size, num_channels, h, n_w, l),
                                            (sizeof_item*num_px*num_channels, sizeof_item*num_px, sizeof_item*w, sizeof_item*s, sizeof_item))
        ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2)).copy()
        
        l = math.ceil(h/n_h)
        s = math.floor(h/n_h)
        
        ar = np.lib.stride_tricks.as_strided(ar, (batch_size, num_channels, n_w, n_h, l),\
                                            (sizeof_item*n_w*h*num_channels, sizeof_item*n_w*h, sizeof_item*h, sizeof_item*s, sizeof_item))
        ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2))
        
        stack.append(np.reshape(ar, (batch_size, num_channels, -1)))
    
    stack = np.concatenate(stack, axis=2)
    print(stack.shape)
    
    return stack
def tf_spatial_pyramid_pooling(tf_input_feature_maps, tf_spatial_pyramid, dtype=tf.float32):
    return tf.py_func(np_spatial_pyramid_pooling, [tf_input_feature_maps, tf.spatial_pyramid], dtype)
    
x = tf.random.normal([8, 256, 256, 32])
s = [[1,1], [2,2], [3,3], [4,4]]
spatials = np.asmatrix(s)
s = np.asarray(spatials)
print(s.shape)

res = np_spatial_pyramid_pooling(input_feature_maps=x, spatial_pyramid=s)