python实现一个tenCrop的函数

最新推荐文章于 2024-06-15 00:02:51 发布

悟道修炼中

最新推荐文章于 2024-06-15 00:02:51 发布

阅读量1k

点赞数 2

分类专栏：编程总结

本文链接：https://blog.csdn.net/CHIERYU/article/details/86596881

版权

编程总结专栏收录该内容

30 篇文章 0 订阅

订阅专栏

虽然torchvision.transforms模块实现了"RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop"等函数，本文写下另一个实现，方便自己随便改写Tencrop的实现代码。

如下所示：

import numpy as np


def flow_stack_oversample(flow_stack, crop_dims):
    """ 
    This function performs oversampling on flow stacks.
    Adapted from pyCaffe's oversample function
    :param flow_stack:
    :param crop_dims:
    :return:
    """
    im_shape = np.array(flow_stack.shape[1:])
    stack_depth = flow_stack.shape[0]
    crop_dims = np.array(crop_dims)

    h_indices = (0, im_shape[0] - crop_dims[0])
    w_indices = (0, im_shape[1] - crop_dims[1])

    h_center_offset = (im_shape[0] - crop_dims[0])/2
    w_center_offset = (im_shape[1] - crop_dims[1])/2

    crop_ix = np.empty((5, 4), dtype=int)

    cnt = 0
    for i in h_indices:
        for j in w_indices:
            crop_ix[cnt, :] = (i, j, i+crop_dims[0], j+crop_dims[1])
            cnt += 1
    crop_ix[4, :] = [h_center_offset, w_center_offset,
                     h_center_offset+crop_dims[0], w_center_offset+crop_dims[1]]

    crop_ix = np.tile(crop_ix, (2,1))

    crops = np.empty((10, flow_stack.shape[0], crop_dims[0], crop_dims[1]),
                     dtype=flow_stack.dtype)

    for ix in xrange(10):
        cp = crop_ix[ix]
        crops[ix] = flow_stack[:, cp[0]:cp[2], cp[1]:cp[3]]
    crops[5:] = crops[5:, :, :, ::-1]    #最后一维倒序输出
    crops[5:, range(0, stack_depth, 2), ...] = 255 - crops[5:, range(0, stack_depth, 2), ...]
    return crops


def rgb_oversample(image, crop_dims):
    """
    Crop images into the four corners, center, and their mirrored versions.
    Adapted from Caffe
    Parameters
    ----------
    image : (H x W x K) ndarray
    crop_dims : (height, width) tuple for the crops.
    Returns
    -------
    crops : (10 x H x W x K) ndarray of crops.
    """
    # Dimensions and center.
    im_shape = np.array(image.shape)
    crop_dims = np.array(crop_dims)
    im_center = im_shape[:2] / 2.0

    # Make crop coordinates
    h_indices = (0, im_shape[0] - crop_dims[0])
    w_indices = (0, im_shape[1] - crop_dims[1])
    crops_ix = np.empty((5, 4), dtype=int)
    curr = 0
    for i in h_indices:
        for j in w_indices:
            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
            curr += 1
    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
        -crop_dims / 2.0,
         crop_dims / 2.0
    ])
    crops_ix = np.tile(crops_ix, (2, 1))

    # Extract crops
    crops = np.empty((10 , crop_dims[0], crop_dims[1],
                      im_shape[-1]), dtype=np.float32)

    ix = 0
    for crop in crops_ix:
        crops[ix] = image[crop[0]:crop[2], crop[1]:crop[3], :]
        ix += 1
    crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
    return crops

source code position: https://github.com/yjxiong/temporal-segment-networks/blob/master/pyActionRecog/utils/io.py