基于Unity ComputeShader 实现正向DIBR

最新推荐文章于 2024-08-23 09:55:49 发布

VR技术小光

最新推荐文章于 2024-08-23 09:55:49 发布

阅读量943

点赞数 1

分类专栏： 3D引擎 Unity3D 光线追踪文章标签： unity 游戏引擎 DIBR computer vision

本文链接：https://blog.csdn.net/hakukou/article/details/126365232

版权

Unity3D 同时被 3 个专栏收录

26 篇文章 0 订阅

订阅专栏

3D引擎

12 篇文章 0 订阅

订阅专栏

光线追踪

4 篇文章 0 订阅

订阅专栏

什么是DIBR？

就是根据1张或者n张带有深度信息的图片，也就是RGBD图，通过插值的方式，生成出虚拟视点下的效果图。

深度信息如果不够精确，效果上，容易出现一些错误。

以及视角的变化，导致一些信息不足，会存在空洞问题。

如果在Unity中怎么实现呢。

通过DIBR算法，在场景中预制两个相机，通过脚本分别获取颜色图和深度图。

然后通过computeshader，将两个深度图插值为中间视点的图。

根据几何关系，可以推导出左右视图的关系。

相关像素的水平视差 dispararity满足如上公式。

这是针对一般用相机拍摄的图片来说的。f代表相机的焦距。

在unity下的相机，没有焦距的概念。怎么办呢。

由于以上公式，都是以物理实际距离尺度计算的。需要变换到像素空间尺度下，便于计算和插值，得到像素关系。

所以上面的公式，两面同时除以像素宽度pitch。

像素尺度视差=disparity/pitch=B*f/(Z*pitch)=B/Z*f/pitch

令F=f/pitch得到B/Z*F

这个F刚好可以推导出和fov的关系。

可以假定有个投影面，仿照真实相机的物理结构。

投影面也就是cmos感光元器件。假设水平分辨率为Res，每个像素宽度为pitch。

所以有w=Res*pitch。

又有，w/2/f=tan(fov/2)

所以Res*pitch/2/f=tan(fov/2)

可以推导出 F=f/pitch=f*Res/w=Res/(2tan(fov/2))

比如水平fov=90度，分辨率是3840，那么F=1920

可以看出F只和fov和分辨率有关了。

于是根据左图的深度图得到深度（需要从0-1区间变换到view坐标系）

和两个相机的距离，就可以插值出虚拟视点的图了。大概就是这个原理。

其中最大视差的计算方法为 B/near*F 因为near平面的视差最大。

如果有最大视差要求，怎么判端两个相机最远能离开多远呢？

B=near*最大视察/F。

例如最大视差为64，near=0.3 F=1920 那么相机最大距离为 0.01

也就是两个相机距离1厘米时，最大视差是64个像素。

这个用在反向DIBR时也很有用（因为需要遍历搜索，所以估算遍历次数很重要）

以下是C#源代码。

首先是获取相机深度的代码；挂载相机物体上。

using System.Collections;
using System.Collections.Generic;
using UnityEngine;


public class RenderWithDepth : MonoBehaviour
{
    public RenderTexture colorRT;
    public RenderTexture depthRT;

    // Start is called before the first frame update
    void Start()
    {

        colorRT = new RenderTexture(3840, 2160, 0);
        depthRT = new RenderTexture(3840, 2160, 24, RenderTextureFormat.Depth);

        GetComponent<Camera>().SetTargetBuffers(colorRT.colorBuffer, depthRT.depthBuffer);
    }

}

其次是C#和computeshader代码，挂载到虚拟视点相机上。

public class RenderByCSDIBR : MonoBehaviour
{
    int width = 3840;
    int height = 2160;
    public ComputeShader dibrCS;
    public RenderWithDepth lDep, rDep;
    RenderTexture resultRT_L, resultRT_R;

    // Start is called before the first frame update
    void Start()
    {
        resultRT_L = new RenderTexture(width, height, 0, RenderTextureFormat.ARGB32);
        resultRT_L.enableRandomWrite = true;
        resultRT_L.Create();

        resultRT_R = new RenderTexture(width, height, 0, RenderTextureFormat.ARGB32);
        resultRT_R.enableRandomWrite = true;
        resultRT_R.Create();
    }


    public void ClearOutRenderTexture(RenderTexture renderTexture)
    {
        RenderTexture rt = RenderTexture.active;
        RenderTexture.active = renderTexture;
        GL.Clear(true, true, Color.clear);
        RenderTexture.active = rt;
    }

    private void OnPostRender()
    {

        ClearOutRenderTexture(resultRT_L);
        ClearOutRenderTexture(resultRT_R);

        int k1 = dibrCS.FindKernel("CSMain");
        dibrCS.SetTexture(k1, "ResultL", resultRT_L);
        dibrCS.SetTexture(k1, "ResultR", resultRT_R);

        dibrCS.SetTexture(k1, "LC", lDep.colorRT);
        dibrCS.SetTexture(k1, "RC", rDep.colorRT);
        dibrCS.SetTexture(k1, "LD", lDep.depthRT);
        dibrCS.SetTexture(k1, "RD", rDep.depthRT);

        dibrCS.SetFloat("_LCamPos", lDep.transform.position.x);
        dibrCS.SetFloat("_RCamPos", rDep.transform.position.x);
        dibrCS.SetFloat("_CurCamPos", transform.position.x);

        dibrCS.SetFloat("_Far", 1000);
        dibrCS.SetFloat("_Near", 0.3f);

        dibrCS.Dispatch(k1, width / 8, height / 8, 1);

    }
    private void OnRenderImage(RenderTexture source, RenderTexture destination)
    {
        Graphics.Blit(resultRT_L, destination);
    }
}

computeshader代码

#pragma kernel CSMain

#define RES_W 3840

// Create a RenderTexture with enableRandomWrite flag and set it
// with cs.SetTexture
RWTexture2D<float4> ResultL, ResultR;
Texture2D<float4> LC, RC;
Texture2D<float4> LD, RD;
float _LCamPos, _RCamPos, _CurCamPos;
float _Far, _Near;

float LinearEyeDepth(float _z)
{
    float x = 1.0 - _Far / _Near;
    float y = _Far / _Near;
    float z = x / _Far;
    float w = y / _Far;
    return 1.0 / (z * _z + w);
}


float getViewZ(uint2 pos, bool isLeft) {
    float z;
    if (isLeft)
        z = LinearEyeDepth(1-LD[pos].x);
    else
        z = LinearEyeDepth(1-RD[pos].x);
    return z;
}

[numthreads(8,8,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
    // TODO: insert actual code here!
    float maxBaseLine = _RCamPos - _LCamPos;
    float fov = radians(90.0f);
    float near = 0.3f;
    float focal = RES_W / (2.0f * tan(fov / 2));
    float maxDis = maxBaseLine * focal / _Near;

    float baselineL = _CurCamPos - _LCamPos;
    float baselineR = maxBaseLine - baselineL;

    // 左眼视图
    float z = getViewZ(id.xy, true);
    int dis = int(baselineL * focal / z);
    int x = max(0, id.x - dis);

    float dis_h = int(dis / 256) / 255.0f;
    float dis_l = int(dis % 256) / 255.0f;

    float4 cur = ResultR[int2(x, id.y)];
    float discur = cur.x * 255 * 256 + cur.y * 255;

    float4 col = LC[id.xy];
    if (dis > discur) {
        ResultL[int2(x, id.y)] = col;//float4(dis_h, dis_l, 0, 1);
        ResultR[int2(x, id.y)] = float4(dis_h, dis_l, 0, 1);
    }
        

    //Result[id.xy] =  float4(z, z, z, 1);
    // 右眼视图
    z = getViewZ(id.xy, false);
    dis = uint(baselineR * focal / z);
    x = min(RES_W-1, id.x + dis);
    dis_h = int(dis / 256) / 255.0f;
    dis_l = int(dis % 256) / 255.0f;
    cur = ResultR[uint2(x, id.y)];
    discur = cur.x * 255 * 256 + cur.y * 255;

    col = RC[id.xy];
    if (dis > discur) {
        ResultL[int2(x, id.y)] = col;//float4(dis_h, dis_l, 0, 1);
        ResultR[int2(x, id.y)] = float4(dis_h, dis_l, 0, 1);// RC[id.xy];
    }
        


}