unity_Shader延迟渲染

最新推荐文章于 2025-05-23 18:15:48 发布

原创最新推荐文章于 2025-05-23 18:15:48 发布 · 1.3k 阅读

12 ·

CC 4.0 BY-SA版权

文章标签：

#unity #shader

shader基础知识点专栏收录该内容

26 篇文章

订阅专栏

本文深入探讨了延迟渲染的原理及实现，通过自定义shader代码详细解释了GBuffer的使用和光照计算过程，揭示了其在显存带宽、光照算法统一性、硬件AA和透明物体渲染方面的局限。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

自己写了个延迟渲染的GBuffer，在网上搜索了一圈，想看看延迟渲染Deferred到底是咋回事，不知道是我不会查找，还是大佬们觉得延迟渲染太简单，大家都在讲原理，讲优点，讲缺点，可我就是想看看代码到底是咋回事，咋就这么难？？！！！不多说了，根据自己的理解，加上shader代码，终于是加深了对延迟渲染的理解：
（1）延迟渲染原理：
在这里插入图片描述延迟渲染的实质，是先不要做迭代三角形，不做光照计算，而是先找出来你能看到的所有像素，再去迭代光照。直接迭代三角形的话，由于大量三角形你是看不到的，无疑是极大的浪费。
先说明GBuff的使用：
在这里插入图片描述
第一个渲染GBuffer的pass：

Shader "Unlit/002"
{
    Properties
    {
        _MainTex("mainTex",2D)="white"{}
        _Diffuse("Diffuse",Color) = (1,1,1,1)
        _Specular("Specular",Color) = (1,1,1,1)
        _Gloss("Gloss",Range(8.0,50)) = 20
    }
    SubShader
    {
        Tags { "RenderType" = "Opaque" }
        LOD 100
       
        Pass
        {
            Tags{"LightMode"="Deferred"}

            CGPROGRAM

            #pragma vertex vert
            #pragma fragment frag
			#pragma exclude_renderers norm
			#pragma multi_compile __ UNITY_HDR_ON
            #include "UnityCG.cginc"

            sampler2D _MainTex;
            float4 _MainTex_ST;
            float4 _Diffuse;
            float4 _Specular;
            float _Gloss;

            struct appdata{
                float4 vertex:POSITION;
                float3 normal:NORMAL;
                float2 uv:TEXCOORD0;
            };
            struct v2f{
                float4 pos:SV_POSITION;
                float2 uv : TEXCOORD0;
                float3 worldNormal:TEXCOORD1;
                float3 worldPos : TEXCOORD2;
            };

            struct DeferredOutPut{
                float4 gBuffer0 : SV_TARGET0;
                float4 gBuffer1 : SV_TARGET1;
                float4 gBuffer2 : SV_TARGET2;
                float4 gBuffer3 : SV_TARGET3;
            };

            v2f vert(appdata v){
                v2f o;
                o.pos = UnityObjectToClipPos(v.vertex);
                o.uv = TRANSFORM_TEX(v.uv,_MainTex);
                o.worldNormal = UnityObjectToWorldNormal(v.normal);
                o.worldPos = mul(unity_ObjectToWorld,v.vertex).xyz;
                return o;
            }

            DeferredOutPut frag(v2f i){
                DeferredOutPut o;
                fixed3 color = tex2D(_MainTex,i.uv).rgb * _Diffuse.rgb;
                o.gBuffer0.rgb = color;
                o.gBuffer0.a = 1;
                o.gBuffer1.rgb = _Specular.rgb;
                o.gBuffer1.a = _Gloss/50.0;
                o.gBuffer2 = float4(i.worldNormal * 0.5 + 0.5,1);
                #if !defined(UNITY_HDR_ON)
					color.rgb = exp2(-color.rgb);
				#endif
				o.gBuffer3 = fixed4(color,1);
                return o;

            }
            ENDCG

        }
    }
}

下面重点来了！！！！！！！！！！自定义一个延迟渲染的shader，而不是用默认的Built-In
在这里插入图片描述（2）第二个Pass 在deferred的shader的Pass 中，主要是对GBuffer中传过来的数据进行解析变换，同时构建出世界坐标等其他参数，主要就是使用内置的函数UnityDeferredCalculateLightParams，此函数在UnityDeferredLibrary.cginc中，我下面的代码中的注释会解释此函数的计算，（在vert着色器中顶点坐标变换到了裁剪空间中，此时还没有进行NDC的坐标归一化。代码中有最基本的lamber漫反射和半Bliphone高光反射的光照模型）：

Shader "Unlit/0002-Deferred"
{
    Properties
    {

    }
    SubShader
    {
		ZWrite Off
		//LDR Blend DstColor Zero    HDR : Blend One One
		Blend one one
        Pass
        {

            CGPROGRAM
			#pragma target 3.0
            #pragma vertex vert
            #pragma fragment frag
			#pragma multi_compile_lightpass
			//代表排除不支持MRT的硬件
			#pragma exclude_renderers norm
			//#pragma multi_compile __ UNITY_HDR_ON

			#include "UnityCG.cginc"
			#include "UnityDeferredLibrary.cginc"
			#include "UnityGBuffer.cginc"

			sampler2D _CameraGBufferTexture0;
			sampler2D _CameraGBufferTexture1;
			sampler2D _CameraGBufferTexture2;

			struct a2v
			{
				float4 vertex : POSITION;
				float3 normal :NORMAL;
			};

			struct v2f
			{
				float4 pos : SV_POSITION;
				float4 uv :TEXCOORD0;
				float3 ray : TEXCOORD1;
			};

			v2f vert(a2v i)
			{
				v2f o;
				o.pos = UnityObjectToClipPos(i.vertex);
				o.uv = ComputeScreenPos(o.pos);
				o.ray = UnityObjectToViewPos(i.vertex);
				//_LightAsQuad  当在处理四边形时，也就是直射光时返回1，否则返回0
				o.ray = lerp(o.ray, i.normal, _LightAsQuad);
				return o;
			}


			fixed4 frag(v2f i) : SV_Target
			{
				float3 worldPos;
				float2 uv;
				half3 lightDir;
				float atten;
				float fadeDist;
				UnityDeferredCalculateLightParams(i,worldPos,uv,lightDir, atten,fadeDist);

				//float2 uv = i.uv.xy/i.uv.w;

				////通过深度和方向重新构建世界坐标
				//float depth = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv);
				//depth = Linear01Depth(depth);
				////ray 只能表示方向，长度不一定   _ProjectionParams.z是远平面， 因为xyz都是等比例，所以 _ProjectionParams.z/i.ray.z就是 rayToFraPlane向量和ray向量的比值
				//float3 rayToFraPlane = i.ray * (_ProjectionParams.z/i.ray.z);
				//float4 viewPos = float4(rayToFraPlane * depth,1);
				//float3 worldPos = mul(unity_CameraToWorld, viewPos).xyz;

				//float fadeDist = UnityComputeShadowFadeDistance(worldPos, viewPos.z);

				////对不同的光进行光衰减计算 包括阴影计算
				//#if defined(SPOT)
				//	float3 toLight = _LightPos.xyz-worldPos;
				//	half3 lightDir = normalize(toLight);

				//	float4 uvCookie = mul(unity_WorldToLight, float4(worldPos,1));
				//	float atten = tex2Dbias(_LightTexture0, float4(uvCookie.xy/uvCookie.w,0,-8)).w;

				//	atten *= uvCookie < 0;

				//	atten *= tex2D(_LightTextureB0, dot(toLight,toLight) * _LightPos.w).r;

				//	atten *= UnityDeferredComputeShadow(worldPos, fadeDist, uv);
				//#elif defined(DIRECTIONAL) || defined(DIRECTIONAL_COOKIE)
				//	half3 lightDir = -_LightDir.xyz;
				//	float atten = 1.0;

				//	atten *= UnityDeferredComputeShadow(worldPos, fadeDist, uv);

				//	#if defined(DIRECTIONAL_COOKIE)
				//	float4 uvCookie = mul(unity_WorldToLight, float4(worldPos,1));
				//	atten *= tex2Dbias(_LightTexture0, float4(uvCookie.xy,0,-8)).w;
				//	#endif

				//#elif defined(POINT) || defined(POINT_COOKIE)
				//	float3 toLight = _LightPos.xyz-worldPos;
				//	half3 lightDir = normalize(toLight);

				//	float atten = tex2D(_LightTextureB0, dot(toLight,toLight) * _LightPos.w).r;

				//	atten *= UnityDeferredComputeShadow(worldPos, fadeDist, uv);

				//	#if defined(POINT_COOKIE)
				//	float4 uvCookie = mul(unity_WorldToLight, float4(worldPos,1));
				//	atten *= texCUBEbias(_LightTexture0, float4(uvCookie.xyz, -8)).w;
				//	#endif
				//#else
				// half3 lightDir = 0;
				// float atten = 0;
				//#endif

				half3 lightColor = _LightColor.rgb * atten;

				half4 gbuffer0 = tex2D(_CameraGBufferTexture0, uv);
				half4 gbuffer1 = tex2D(_CameraGBufferTexture1, uv);
				half4 gbuffer2 = tex2D(_CameraGBufferTexture2, uv);

				half3 diffuseColor = gbuffer0.rgb;
				half3 specularColor = gbuffer1.rgb;
				float gloss = gbuffer1.a * 50;
				float3 worldNormal = normalize(gbuffer2.xyz * 2 - 1);

				fixed3 viewDir = normalize(UnityWorldSpaceViewDir(worldPos));
				fixed3 halfDir = normalize(lightDir + viewDir);

				half3 diffuse = lightColor * diffuseColor * max(0,dot(worldNormal,lightDir));
				half3 specular = lightColor * specularColor * pow(max(0,dot(worldNormal,halfDir)),gloss);

				half4 color = float4(diffuse + specular,1);
				
				return color;
				
			}
            ENDCG
        }

		
		//转码pass，主要是对于LDR转码
		Pass 
		{
			ZTest Always
			Cull Off
			ZWrite Off
			Stencil
			{
				Ref[_StencilNonBackGround]
				ReadMask[_StencilNonBackground]

				CompBack equal
				CompFront equal
			}

			CGPROGRAM
			#pragma target 3.0
			#pragma vertex vert
			#pragma fragment frag
			#pragma exclude_renderers nomrt

			#include "UnityCG.cginc"

			sampler2D _LightBuffer;
			struct v2f
			{
				float4 vertex:SV_POSITION;
				float2 texcoord: TEXCOORD0;
			};

			v2f vert(float4 vertex:POSITION,float2 texcoord :TEXCOORD0)
			{
				v2f o;
				o.vertex = UnityObjectToClipPos(vertex);
				o.texcoord = texcoord.xy;
				#ifdef UNITY_SINGLE_PASS_STEREO
				o.texcoord = TransformStereoScreenSpaceTex(o.texcoord,1.0);
				#endif
				return o;
			}

			fixed4 frag(v2f i) :SV_Target
			{
				return -log2(tex2D(_LightBuffer,i.texcoord));
			}
			ENDCG
		}
    }
}

设置相机的RenderPath:Deferred
在这里插入图片描述
(3)延迟渲染的细节流程：
打开FrameDebug，查看渲染过程，点击RT0：可以看到4个RT
缺点1：此处请留意：占用大量的显存带宽。这个其实也好理解，说穿了，就是显存的读写，是非常受限的，我应该在“认识显卡”章节，有单独介绍过显存位宽的计算。现在主流DDR5显卡，带宽大概是200GBit好像，假设渲染窗口是1920 * 1080 * 4（RGBA）* 4（MRT）* 8（Byte to bit） * 60（FPS），那么，光是这个Gbuffer，占用的带宽，我算了一下，就是15G。这其实已经是极大的消耗，在台式机还能勉强承受，在移动端设备，我估计就这个就坑逼了。
（4）查看下Lighting：
在这里插入图片描述缺点2：只能使用同一个光照pass。这个很好理解，因为如果是一个一个mesh迭代，我一个mesh用一个material，里面用自己的光照算法。另外一个mesh我不想用光照，直接一个白板pass，当然是可以的。但是，渲染到Gbuffer之后，你其实已经不知道哪个像素点属于哪个Mesh了，自然就只能使用同一套光照算法了。这个为什么一般的书里都不提呢，主要是大多数时候，本来一个场景的光照算法就是要统一的，不统一的反而是少数。

（5）动手添加灯光和延迟渲染物体，查看drawCall，和前向渲染做比较，差异还是很大的！！

延迟渲染另外还有2个缺点：
1、不能使用硬件AA（MSAA），这个，我在其他地方应该讲过，render to texture，是不能用MSAA的，这个跟AA的原理有关。所以使用了延迟渲染之后，UE4只支持FXAA跟TXAA。
2、不支持透明物体的渲染。为什么呢？很简单啊，因为之前渲染Mesh的时候，是一个一个迭代的，一个一个做深度测试之类的，但是，延迟渲染要先渲染到Gbuffer。可想而知，Gbuffer只是把当前能看到的像素记录下来，但是透明的，同一个像素点，可能需要记录更多！所以，延迟渲染的时候，一般都是先渲染非透明的Mesh，后续再单独渲染透明Mesh。