基于深度图的反向合成虚拟视点由如下步骤构成:
- 已知虚拟视点的深度图和源视点的RGB图像,相机内外参信息。
- 根据虚拟视点一点[x,y,1](像素坐标系下的其次坐标表示),然后根据相机内参信息以及对应的虚拟视点[x,y,1]处的深度信息得到[x,y,z,1]。之后根据相机的旋转和平移矩阵(相机外参)得到原视点相机坐标系的空间点,再根据相机内参得到源视点像素坐标系下一点[x’,y’],然后取得对应的像素值作为虚拟视点[x,y]处的像素值。
程序如下所示:
import torch.nn as nn
import torch
import numpy as np
import json
import imageio
# import Path
from pathlib import Path
import skimage
import numpy
import OpenEXR
import Imath
import torch.nn.functional as F
'''
用这种方法要注意,这里面的深度信息是完整的,也就是不存在破洞的,所以这个是一个很大的区别
'''
class BackprojectDepth(nn.Module):
"""Layer to transform a depth image into a point cloud
"""
def __init__(self, batch_size, height, width):
super(BackprojectDepth, self).__init__()
self.batch_size = batch_size
self.height = height
self.width = width
meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
requires_grad=False)
self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
requires_grad=False)
self.pix_coords = torch.unsqueeze(torch.stack(
[self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
requires_grad=False)
def forward(self, depth, inv_K):
cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
cam_points = depth.view(self.batch_size, 1, -1) * cam_points
cam_points = torch.cat([cam_points, self.ones], 1)
return cam_points
class Project3D(nn.Module):
"""Layer which projects 3D points into a camera with intrinsics K and at position T
"""
def __init__(self, batch_size, height, width, eps=1e-7):
super(Project3D, self).__init__()
self.batch_size = batch_size
self.height = height
self.width = width
self.eps = eps
def forward(self, points, K, T): # T应该是RT的组合吧
P = torch.matmul(K, T)[:, :3, :]
cam_points = torch.matmul(P, points.to(torch.float32))
# 点云投影到平面为什么需要 除以深度值? 跟齐次坐标变换是不是有关 就是齐次坐标系变换为笛卡尔坐标系
# 上面的需要除以一个depth value才能得到最后的像素坐标
# https://www.cnblogs.com/riddick/p/8511960.html
pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
pix_coords = pix_coords.permute(0, 2, 3, 1)
pix_coords[..., 0] /= self.width - 1 # 不太理解为什么需要除以对应的宽高 然后减1
pix_coords[..., 1] /= self.height - 1
pix_coords = (pix_coords - 0.5) * 2
return pix_coords
def read_image(path: Path) -> torch.Tensor:
image = skimage.io.imread(path.as_posix())
return image
def read_depth(path: Path) -> torch.Tensor:
if path.suffix == '.png':
depth = skimage.io.imread(path.as_posix())
elif path.suffix == '.npy':
depth = numpy.load(path.as_posix())
elif path.suffix == '.npz':
with numpy.load(path.as_posix()) as depth_data:
depth = depth_data['depth']
elif path.suffix == '.exr':
exr_file = OpenEXR.InputFile(path.as_posix())
raw_bytes = exr_file.channel('Y', Imath.PixelType(Imath.PixelType.FLOAT))
depth_vector = numpy.frombuffer(raw_bytes, dtype=numpy.float32)
height = exr_file.header()['displayWindow'].max.y + 1 - exr_file.header()['displayWindow'].min.y
width = exr_file.header()['displayWindow'].max.x + 1 - exr_file.header()['displayWindow'].min.x
depth = numpy.reshape(depth_vector, (height, width))
else:
raise RuntimeError(f'Unknown depth format: {path.suffix}')
return depth
def _data_load():
frame1_path = Path(r'C:\Users\Asher\Desktop\0000\images\0000.png')
frame2_path = Path(r'C:\Users\Asher\Desktop\0000\images\0009.png')
# 这个深度跟前面的一个很大的区别 就是以下面用的是 0009的深度信息
depth0_path = Path(r'C:\Users\Asher\Desktop\0000\depths\0000.exr')
depth1_path = Path(r'C:\Users\Asher\Desktop\0000\depths\0009.exr')
posespath1 = r'C:\Users\Asher\Desktop\0000\poses\0000.json'
posespath2 = r'C:\Users\Asher\Desktop\0000\poses\0009.json'
# 获取相机的参数 内外参
f = open(posespath1)
dict = json.load(f)
cy = dict['c_y']
cx = dict['c_x']
fx = dict['f_x']
fy = dict['f_y']
intrinsic = np.array([[fx, 0, cx, 0], [0, fy, cy, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
transformation1 = np.array(dict['extrinsic'], dtype=np.float32)
f = open(posespath2)
dict = json.load(f)
cy = dict['c_y']
cx = dict['c_x']
fx = dict['f_x']
fy = dict['f_y']
intrinsic2 = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
transformation2 = np.array(dict['extrinsic'], dtype=np.float32)
# 获取图片
frame1 = read_image(frame1_path).astype(np.float64)
frame2 = read_image(frame2_path).astype(np.float64)
depth1 = read_depth(depth1_path).astype(np.float64)
depth0 = read_depth(depth0_path).astype(np.float64)
mask0 = np.isinf(depth0)
mask = np.isinf(depth1)
frame1[mask0] = 0
frame2[mask] = 0
depth1[mask] = 0
# 转换数据类型 并且转为 (b, c, h, w)形式
frame1 = torch.from_numpy(frame1).unsqueeze(0).permute(0, 3, 1, 2)
frame2 = torch.from_numpy(frame2).unsqueeze(0).permute(0, 3, 1, 2)
depth1 = torch.from_numpy(depth1).unsqueeze(0).unsqueeze(0)
transformation1 = torch.from_numpy(transformation1).unsqueeze(0)
transformation2 = torch.from_numpy(transformation2).unsqueeze(0)
intrinsic = torch.from_numpy(intrinsic).unsqueeze(0).to(torch.float32)
return frame1, frame2, depth1, transformation1, transformation2, intrinsic, mask
if __name__ == "__main__":
frame1, frame2, depth1, transformation1, transformation2, intrinsic, mask = _data_load()
img_height = frame1.shape[2]
img_width = frame1.shape[3]
Backwarp = BackprojectDepth(1, img_height, img_width)
Project = Project3D(1, img_height, img_width)
intrinsic_inv = torch.linalg.inv(intrinsic)
T = transformation = torch.bmm(transformation1, torch.linalg.inv(transformation2)) # (b, 4, 4)
cam_points =Backwarp(depth1, intrinsic_inv)
pix_coords =Project(cam_points, intrinsic, T).to(torch.float64)
output = F.grid_sample(frame1, pix_coords, padding_mode="border")
output = output.permute(2, 3, 1, 0).detach().cpu().numpy().astype(np.uint8)[:, :, :, 0]
# save_image = output[:, :, :, 0]
output[mask] = 0
skimage.io.imsave('output.png', output)
pass
输入图像如下所示,分别是源视图的RGB图像和目标视图的深度图:
输出是如下图所示;
如右下角所示,因为遮挡原因,但是反向合成还是会从源视图中取值,所以会导致伪影的出现。
我们采用的是MVS-synth数据集,地址如下所示:
其中可以用于实验的一个子集: