pytorch F.affine_grid F.grid_sample探究

最新推荐文章于 2024-06-11 20:06:28 发布

normol

最新推荐文章于 2024-06-11 20:06:28 发布

阅读量1w

点赞数 15

分类专栏：计算机视觉文章标签： pytorch affine_grid grid_sample

本文链接：https://blog.csdn.net/normol/article/details/108264857

版权

计算机视觉专栏收录该内容

26 篇文章 66 订阅

订阅专栏

在pytorch框架中， F.affine_grid 与 F.grid_sample（torch.nn.functional as F）联合使用来对图像进行变形。

F.affine_grid 根据形变参数产生sampling grid，F.grid_sample根据sampling grid对图像进行变形。

需要注意，pytorch中的F.grid_sample是反向采样，这就导致了形变参数与直觉是相反的（后面有实验验证）（例如放射矩阵中的缩放因子是0.5，会使目标图像扩大两倍；平移为正会使目标图像往左上角移动）

【反向采样】：定义的sampling grid尺寸即输出图像的尺寸，而sampling grid其中每一个位置内的数值x,y，例如sampling_grid[i,j]=[x,y], 则表示输出图像在(i,j)点的像素值应该在原图(x,y)处取值，若x,y都恰好为整数且在原图范围内，则直接取原图在x,y点的像素值；若x,y不是都为整数但都在原图范围内，则需要用插值算法计算该点的像素值；超出图像范围的则为0。
实验一：
目的：验证F.affine_grid的形变参数与直觉是相反的

def blog_test():
    img = cv2.imread(r'C:\Users\dell7920\Desktop\box.png')
    out_h = img.shape[0]
    out_w = img.shape[1]
    img = np.moveaxis(img, -1, 0)
    print(img.shape)
    img_batch = torch.from_numpy(img).unsqueeze(0).float()
    # -30 表示正向采样中的30(但为顺时针，实际旋转矩阵的角度定义的是逆时针转的度数)
    angle = -30 * math.pi / 180  # 顺时针旋转30度，一定要加pi！
    #D = np.diag([1.5, 1.5])
    A = np.array([[np.cos(angle), -np.sin(angle)],
                  [np.sin(angle), np.cos(angle)]])
    print('A', A)
    tx = 0
    ty = 0
    theta = np.array(
        [[A[0, 0], A[0, 1], tx], [A[1, 0], A[1, 1], ty]])
    theta = torch.from_numpy(theta).float().unsqueeze(0)

    batch_size = theta.size()[0]
    out_size = torch.Size(
        (batch_size, 3, out_h, out_w))
    # 结论！！
    # 需要注意，这个theta与一般见到的theta不一样，这个是反着来的
    grid = F.affine_grid(theta, out_size)
    warped_image_batch = F.grid_sample(img_batch, grid)
    print(warped_image_batch.shape)
    output = warped_image_batch[0, :, :,
                                :].cpu().detach().numpy().astype('uint8')
    print(output.shape)
    output = np.moveaxis(output, 0, -1)
    cv2.imshow('out', output)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

原图：
在这里插入图片描述
变形后的结果

实验二：
目的：验证对sampling grid做乘法，可以实现crop, padding等操作

from skimage import io
import pandas as pd
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt


def grid2contour(grid):
    '''
    ref: https://zhuanlan.zhihu.com/p/147062836
    grid--image_grid used to show deform field
    type: numpy ndarray, shape： (h, w, 2), value range：(-1, 1)
    '''
    assert grid.ndim == 3
    x = np.arange(-1, 1, 2 / grid.shape[1])
    y = np.arange(-1, 1, 2 / grid.shape[0])
    X, Y = np.meshgrid(x, y)
    Z1 = grid[:, :, 0] + 1  # remove the dashed line
    Z1 = Z1[::-1]  # vertical flip
    Z2 = grid[:, :, 1] + 1

    plt.figure()
    plt.contour(X, Y, Z1, 15, colors='k')
    plt.contour(X, Y, Z2, 15, colors='k')
    plt.xticks(()), plt.yticks(())  # remove x, y ticks
    plt.title('deform field')
    plt.show()


class AffineGridGen(Module):
	#out_h，out_w都是原图大小
    def __init__(self, out_h=920, out_w=640, out_ch=3, use_cuda=True):
        super(AffineGridGen, self).__init__()
        self.out_h = out_h
        self.out_w = out_w
        self.out_ch = out_ch

    def forward(self, theta):
        b = theta.size()[0]
        if not theta.size() == (b, 2, 3):
            theta = theta.view(-1, 2, 3)
        theta = theta.contiguous()
        batch_size = theta.size()[0]
        out_size = torch.Size(
            (batch_size, self.out_ch, self.out_h, self.out_w))
        # F.affine_grid是得到output image中每个点在input image中对应的坐标
        # output shape: (batch,h,w,2)
        # 一定要注意，theta的batch要与outsize的batch一致
        # 生成的gird会归一化到[-1,1之间]
        return F.affine_grid(theta, out_size)


theta_identity = torch.Tensor(np.expand_dims(
    np.array([[1, 0, 0], [0, 1, 0]]), 0).astype(np.float32))

grid_gen = AffineGridGen()
grid = grid_gen(theta_identity)
# torch.Size([1, 240, 240, 2])
print(grid.shape)
# 测试，看grid是否正确
#grid2contour(grid[0, :, :, :].cpu().detach().numpy())

#padding_factor = 0.5
padding_factor = 1
crop_factor = 9 / 16
#crop_factor = 1
grid = grid * (padding_factor * crop_factor)


image = io.imread(r'C:\Users\dell7920\Desktop\1b.jpg')
# c,h,w->h,w,c
image = np.moveaxis(image, -1, 0)
print(image.shape)
image_batch = torch.from_numpy(image).unsqueeze(0).float()
warped_image_batch = F.grid_sample(image_batch, grid)
output_image = warped_image_batch.cpu().detach().numpy()[0, :, :, :]
# h,w,c -> c,h,w
output_image = np.moveaxis(output_image, 0, -1)
plt.figure()
plt.imshow(output_image.astype('uint8'))
plt.show()