代码记录:Spatial pyramid pooling Implementation

import numpy as np
import math
import tensorflow as tf

#inupt_feature maps if of the form: N, C, H, W
def np_spatial_pyramid_pooling(input_feature_maps, spatial_pyramid, dtype=np.float32):
    assert input_feature_maps.ndim == 4
    assert spatial_pyramid.ndim == 2
    assert spatial_pyramid.shape[1] == 2
    
    batch_size = input_feature_maps.shape[0]
    num_channels = input_feature_maps.shape[1]
    h = input_feature_maps.shape[2]
    w = input_feature_maps.shape[3]
    
    num_levels = spatial_pyramid.shape[0]
    
    #N-C-W*H
    flattened_feature_maps = np.reshape(input_feature_maps, (batch_size, num_channels, -1))
    # num_px: N*H
    num_px = flattened_feature_maps.shape[2]
    
    #求特定维度的积
    bins_per_level = np.prod(spatial_pyramid, axis=1)
    print(bins_per_level)
    num_bins = np.sum(bins_per_level)
    stack = []
    
    #stride tricks, then max pooling along one dimension
    #then stride tricks again and max pool along the other dimension
    #but whats the length and stride?
    #ceil(w/n) for window size, floor(w/n) for stride,
    #where w is the original dim, and n is the number of bins along the dim
    #but this implementation may leave out some pixles (consider w = 5, n =3)
    
    sizeof_item = np.dtype(dtype).itemsize
    
    #cycle 4 times
    for i in range(num_levels):
        n_h = spatial_pyramid[i][0]
        n_w = spatial_pyramid[i][1]
        
        l = math.ceil(w/n_w)
        s = math.floor(w/n_w)
        
        ar = np.lib.stride_tricks.as_strided(flattened_feature_maps, (batch_size, num_channels, h, n_w, l),
                                            (sizeof_item*num_px*num_channels, sizeof_item*num_px, sizeof_item*w, sizeof_item*s, sizeof_item))
        ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2)).copy()
        
        l = math.ceil(h/n_h)
        s = math.floor(h/n_h)
        
        ar = np.lib.stride_tricks.as_strided(ar, (batch_size, num_channels, n_w, n_h, l),\
                                            (sizeof_item*n_w*h*num_channels, sizeof_item*n_w*h, sizeof_item*h, sizeof_item*s, sizeof_item))
        ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2))
        
        stack.append(np.reshape(ar, (batch_size, num_channels, -1)))
    
    stack = np.concatenate(stack, axis=2)
    print(stack.shape)
    
    return stack
def tf_spatial_pyramid_pooling(tf_input_feature_maps, tf_spatial_pyramid, dtype=tf.float32):
    return tf.py_func(np_spatial_pyramid_pooling, [tf_input_feature_maps, tf.spatial_pyramid], dtype)
    
x = tf.random.normal([8, 256, 256, 32])
s = [[1,1], [2,2], [3,3], [4,4]]
spatials = np.asmatrix(s)
s = np.asarray(spatials)
print(s.shape)

res = np_spatial_pyramid_pooling(input_feature_maps=x, spatial_pyramid=s)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

MarkJhon

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值