深度图反向合成虚拟视点/backward view synthesis based on depth

最新推荐文章于 2024-07-31 15:55:27 发布

Feliz Da Vida

最新推荐文章于 2024-07-31 15:55:27 发布

阅读量206

点赞数 2

文章标签： python 算法

本文链接：https://blog.csdn.net/qq_45512728/article/details/131146869

版权

基于深度图的反向合成虚拟视点由如下步骤构成：

已知虚拟视点的深度图和源视点的RGB图像，相机内外参信息。
根据虚拟视点一点[x,y,1](像素坐标系下的其次坐标表示)，然后根据相机内参信息以及对应的虚拟视点[x,y,1]处的深度信息得到[x,y,z,1]。之后根据相机的旋转和平移矩阵（相机外参）得到原视点相机坐标系的空间点，再根据相机内参得到源视点像素坐标系下一点[x’,y’]，然后取得对应的像素值作为虚拟视点[x,y]处的像素值。

程序如下所示：

import torch.nn as nn
import torch
import numpy as np
import json
import imageio
# import Path
from pathlib import Path
import skimage
import numpy
import OpenEXR
import Imath
import torch.nn.functional as F

'''
用这种方法要注意，这里面的深度信息是完整的，也就是不存在破洞的，所以这个是一个很大的区别
'''

class BackprojectDepth(nn.Module):
    """Layer to transform a depth image into a point cloud
    """
    def __init__(self, batch_size, height, width):
        super(BackprojectDepth, self).__init__()

        self.batch_size = batch_size
        self.height = height
        self.width = width

        meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
        self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
        self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
                                      requires_grad=False)

        self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
                                 requires_grad=False)

        self.pix_coords = torch.unsqueeze(torch.stack(
            [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
        self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
        self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
                                       requires_grad=False)

    def forward(self, depth, inv_K):
        cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
        cam_points = depth.view(self.batch_size, 1, -1) * cam_points
        cam_points = torch.cat([cam_points, self.ones], 1)

        return cam_points


class Project3D(nn.Module):
    """Layer which projects 3D points into a camera with intrinsics K and at position T
    """
    def __init__(self, batch_size, height, width, eps=1e-7):
        super(Project3D, self).__init__()

        self.batch_size = batch_size
        self.height = height
        self.width = width
        self.eps = eps

    def forward(self, points, K, T): # T应该是RT的组合吧
        P = torch.matmul(K, T)[:, :3, :]

        cam_points = torch.matmul(P, points.to(torch.float32))
        # 点云投影到平面为什么需要 除以深度值？ 跟齐次坐标变换是不是有关 就是齐次坐标系变换为笛卡尔坐标系
        # 上面的需要除以一个depth value才能得到最后的像素坐标
        # https://www.cnblogs.com/riddick/p/8511960.html
        pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
        pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
        pix_coords = pix_coords.permute(0, 2, 3, 1)
        pix_coords[..., 0] /= self.width - 1   # 不太理解为什么需要除以对应的宽高 然后减1
        pix_coords[..., 1] /= self.height - 1
        pix_coords = (pix_coords - 0.5) * 2
        return pix_coords


def read_image(path: Path) -> torch.Tensor:
    image = skimage.io.imread(path.as_posix())
    return image


def read_depth(path: Path) -> torch.Tensor:
    if path.suffix == '.png':
        depth = skimage.io.imread(path.as_posix())
    elif path.suffix == '.npy':
        depth = numpy.load(path.as_posix())
    elif path.suffix == '.npz':
        with numpy.load(path.as_posix()) as depth_data:
            depth = depth_data['depth']
    elif path.suffix == '.exr':
        exr_file = OpenEXR.InputFile(path.as_posix())
        raw_bytes = exr_file.channel('Y', Imath.PixelType(Imath.PixelType.FLOAT))
        depth_vector = numpy.frombuffer(raw_bytes, dtype=numpy.float32)
        height = exr_file.header()['displayWindow'].max.y + 1 - exr_file.header()['displayWindow'].min.y
        width = exr_file.header()['displayWindow'].max.x + 1 - exr_file.header()['displayWindow'].min.x
        depth = numpy.reshape(depth_vector, (height, width))
    else:
        raise RuntimeError(f'Unknown depth format: {path.suffix}')
    return depth


def _data_load():
    frame1_path = Path(r'C:\Users\Asher\Desktop\0000\images\0000.png')
    frame2_path = Path(r'C:\Users\Asher\Desktop\0000\images\0009.png')
    # 这个深度跟前面的一个很大的区别 就是以下面用的是 0009的深度信息
    depth0_path = Path(r'C:\Users\Asher\Desktop\0000\depths\0000.exr')
    depth1_path = Path(r'C:\Users\Asher\Desktop\0000\depths\0009.exr')

    posespath1 = r'C:\Users\Asher\Desktop\0000\poses\0000.json'
    posespath2 = r'C:\Users\Asher\Desktop\0000\poses\0009.json'

    # 获取相机的参数 内外参
    f = open(posespath1)
    dict = json.load(f)
    cy = dict['c_y']
    cx = dict['c_x']
    fx = dict['f_x']
    fy = dict['f_y']
    intrinsic = np.array([[fx, 0, cx, 0], [0, fy, cy, 0], [0, 0, 1, 0], [0, 0, 0, 1]])

    transformation1 = np.array(dict['extrinsic'], dtype=np.float32)

    f = open(posespath2)
    dict = json.load(f)
    cy = dict['c_y']
    cx = dict['c_x']
    fx = dict['f_x']
    fy = dict['f_y']
    intrinsic2 = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
    transformation2 = np.array(dict['extrinsic'], dtype=np.float32)

    # 获取图片
    frame1 = read_image(frame1_path).astype(np.float64)
    frame2 = read_image(frame2_path).astype(np.float64)
    depth1 = read_depth(depth1_path).astype(np.float64)
    depth0 = read_depth(depth0_path).astype(np.float64)
    mask0 = np.isinf(depth0)
    mask = np.isinf(depth1)
    frame1[mask0] = 0
    frame2[mask] = 0
    depth1[mask] = 0

    # 转换数据类型 并且转为 （b, c, h, w)形式
    frame1 = torch.from_numpy(frame1).unsqueeze(0).permute(0, 3, 1, 2)
    frame2 = torch.from_numpy(frame2).unsqueeze(0).permute(0, 3, 1, 2)
    depth1 = torch.from_numpy(depth1).unsqueeze(0).unsqueeze(0)
    transformation1 = torch.from_numpy(transformation1).unsqueeze(0)
    transformation2 = torch.from_numpy(transformation2).unsqueeze(0)
    intrinsic = torch.from_numpy(intrinsic).unsqueeze(0).to(torch.float32)

    return frame1, frame2, depth1, transformation1, transformation2, intrinsic, mask


if __name__ == "__main__":
    frame1, frame2, depth1, transformation1, transformation2, intrinsic, mask = _data_load()
    img_height = frame1.shape[2]
    img_width = frame1.shape[3]
    Backwarp = BackprojectDepth(1, img_height, img_width)
    Project = Project3D(1, img_height, img_width)
    intrinsic_inv = torch.linalg.inv(intrinsic)
    T = transformation = torch.bmm(transformation1, torch.linalg.inv(transformation2))  # (b, 4, 4)
    cam_points =Backwarp(depth1, intrinsic_inv)
    pix_coords =Project(cam_points, intrinsic, T).to(torch.float64)
    output = F.grid_sample(frame1, pix_coords, padding_mode="border")
    output = output.permute(2, 3, 1, 0).detach().cpu().numpy().astype(np.uint8)[:, :, :, 0]
    # save_image = output[:, :, :, 0]
    output[mask] = 0
    skimage.io.imsave('output.png', output)
    pass

输入图像如下所示，分别是源视图的RGB图像和目标视图的深度图：