Unreal 反编译hlsl codebyte代码分析:第一段:
0: mul r0.xyzw, v0.xyxy, cb0[119].zwzw
1: sample_l(texture2d)(float,float,float,float) r1.x, r0.zwzz, t7.xyzw, s7, l(0)
2: max r1.x, r1.x, l(0.0000)
3: add r1.yz, v0.xxyx, -cb0[117].xxyx
4: mad r1.yz, r1.yyzy, cb0[118].zzwz, l(0.0000, -0.5000, -0.5000, 0.0000)
5: mul r2.xy, r1.yzyy, v0.wwww
6: mov r2.z, v0.w
7: mul r1.yzw, r2.xxyz, l(0.0000, 2.0000, -2.0000, 1.0000)
8: mul r2.xyzw, v0.yyyy, cb0[37].xyzw
9: mad r2.xyzw, v0.xxxx, cb0[36].xyzw, r2.xyzw
10: mad r2.xyzw, r1.xxxx, cb0[38].xyzw, r2.xyzw
11: add r2.xyzw, r2.xyzw, cb0[39].xyzw
12: div r2.xyz, r2.xyzx, r2.wwww
自定义工具转换出的计算公式:
0: field1.xyzw = position.xyxy * cb0[119].zwzw;
float4 field2;
1: float depth = max(t7.Sample(s7, field1.zw).x, 0.0000);//深度读取深度 field2.x 这里只取负值
//View_ViewRectMin cb1[117].xyzw float4 0.00, 0.00, 0.00, 0.00
//View_ViewSizeAndInvSize cb1[118].xyzw float4 1207.00, 652.00, 0.00083, 0.00153
3: field2.yz = position.xy - cb0[117].xy * cb0[118].zw + float2(-0.5000, -0.5000);
float4 field3;
5: field3.xy = field2.yz * position.ww;
6: field3.z = position.w;
7: field2.yzw = field3.xyz * float3(2.0000, -2.0000, 1.0000);
//clippos to worldpos
8: field3.xyzw = depth * cb0[38].xyzw + position.xxxx * cb0[36].xyzw + position.yyyy * cb0[37].xyzw + cb0[39].xyzw;
12: field3.xyz = field3.xyz / field3.www;
0:行分析 这里调用的是Unreal的 SvPositionToBufferUV函数:
float2 SvPositionToBufferUV(float4 SvPosition)
{
return SvPosition.xy * View.BufferSizeAndInvSize.zw;
}
1&2行在我的工具中合并在一起了,max函数只取深度大于0的值,第1行是调用unreal的深度图读取函数:
CalcSceneDepth(float2 ScreenUV);
/** Returns clip space W, which is world space distance along the View Z axis. Note if you need DeviceZ LookupDeviceZ() is the faster option */
float CalcSceneDepth(float2 ScreenUV)
{
#if SCENE_TEXTURES_DISABLED
return 0.0f;
#else
#if FEATURE_LEVEL > FEATURE_LEVEL_ES3_1 || MOBILE_FORCE_DEPTH_TEXTURE_READS
return ConvertFromDeviceZ(Texture2DSampleLevel(SceneDepthTexture, SceneDepthTextureSampler, ScreenUV, 0).r);
#else
#if COMPILER_GLSL_ES2
#if IOS
// Only call FramebufferFetch when actually compiling for IOS ES2.
return FramebufferFetchES2().w;
#elif WEBGL
return Texture2DSampleLevel(SceneAlphaCopyTexture, SceneAlphaCopyTextureSampler, ScreenUV, 0).r;
#else
float SceneW = ConvertFromDeviceZ(Texture2DSampleLevel(SceneDepthTexture, SceneDepthTextureSampler, ScreenUV, 0).r);
return DepthbufferFetchES2(SceneW, View.InvDeviceZToWorldZTransform[2], View.InvDeviceZToWorldZTransform[3]);
#endif
#elif METAL_PROFILE && !MAC
return FramebufferFetchES2().w;
#else
return ConvertFromDeviceZ(Texture2DSampleLevel(SceneDepthTexture, SceneDepthTextureSampler, ScreenUV, 0).r);
#endif
#endif
#endif
}
1-7行和在一起是整体调用的是:SvPositionToScreenPosition函数
注意:第七行返回的是 field2.yzw yz表示的是NDCPos.xy w =SvPosition.w
// investigate: doesn't work for usage with View.ScreenToWorld, see SvPositionToScreenPosition2()
float4 SvPositionToScreenPosition(float4 SvPosition)
{
// todo: is already in .w or needs to be reconstructed like this:
// SvPosition.w = ConvertFromDeviceZ(SvPosition.z);
float2 PixelPos = SvPosition.xy - View.ViewRectMin.xy;
// NDC (NormalizedDeviceCoordinates, after the perspective divide)
float3 NDCPos = float3( (PixelPos * View.ViewSizeAndInvSize.zw - 0.5f) * float2(2, -2), SvPosition.z);
// SvPosition.w: so .w has the SceneDepth, some mobile code and the DepthFade material expression wants that
return float4(NDCPos.xyz, 1) * SvPosition.w;
}
8-12行 是矩阵计算:SvPositionToTranslatedWorld
这里传入的参数是float4(depth,position.xy,1)
// Used for post process shaders which don't need to resolve the view
float3 SvPositionToTranslatedWorld(float4 SvPosition)
{
float4 HomWorldPos = mul(float4(SvPosition.xyz, 1), View.SVPositionToTranslatedWorld);
return HomWorldPos.xyz / HomWorldPos.w;
}
13-27行代码分析:
ASM代码:
13: add r1.x, r2.z, -cb0[58].z
14: dp3 r2.w, -r2.xyzx, -r2.xyzx
15: rsq r2.w, r2.w
16: mul r3.xyz, r2.wwww, -r2.xyzx
17: div r1.yz, r1.yyzy, r1.wwww
18: mad r1.yz, r1.yyzy, cb0[54].xxyx, cb0[54].wwzw
19: sample_indexable(texture2d)(float,float,float,float) r4.xyz, r1.yzyy, t8.xyzw, s8
20: sample_l(texture2d)(float,float,float,float) r5.xyzw, r1.yzyy, t4.xyzw, s4, l(0)
21: mul r6.xyz, r5.zwyz, l(10.0000, 10.0000, 10.0000, 0.0000)
22: round_ni r5.yw, r6.xxxz
23: ftoi r1.w, r5.y
24: imax r1.w, r1.w, l(0)
25: imin r1.w, r1.w, l(4)
26: sample_l(texture2d)(float,float,float,float) r7.xyzw, r1.yzyy, t1.zxyw, s1, l(0)
27: sample_l(texture2d)(float,float,float,float) r6.xzw, r1.yzyy, t2.xwyz, s2, l(0)
伪代码:
prefer to use SvPositionToTranslatedWorld() for better quality
//float3 SvPositionToWorld(float4 SvPosition)
//{
// return SvPositionToTranslatedWorld(SvPosition) - View.PreViewTranslation;
//}
//13行只计算了Z轴的坐标
13: field2.x = field3.z - cb0[58].z;//View_PreViewTranslation cb1[58].xyz float3 -165.00, 0.00, -124.00884
14: field3.w = 1 / sqrt(dot(-field3.xyz, -field3.xyz));
float4 field4;
16: field4.xyz = field3.www * -field3.xyz;
//float2 ScreenPositionToBufferUV(float4 ScreenPosition)
//{
// return float2(ScreenPosition.xy / ScreenPosition.w * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz);
//}
17: field2.yz = field2.yz / field2.ww * cb0[54].xy + cb0[54].wz;//View_ScreenPositionScaleBias cb1[54].xyzw float4 0.48669, -0.50, 0.50, 0.48669
float4 field5;
19: field5.xyz = t8.Sample(s8, field2.yz).xyz;
float4 field6;
20: field6.xyzw = t4.Sample(s4, field2.yz).xyzw;
float4 field7;
21: field7.xyz = field6.zwy * float3(10.0000, 10.0000, 10.0000);
22: field6.yw = round(field7.xx);
23: field2.w = (int)field6.yimax r1.w, r1.w, l(0)imin r1.w, r1.w, l(4);
float4 field8;
26: field8.xyzw = t1.Sample(s1, field2.yz).zxyw;
27: field7.xzw = t2.Sample(s2, field2.yz).xwy;
13行的函数来源是:SvPositionToWorld函数,这里只取了Z轴的值
// prefer to use SvPositionToTranslatedWorld() for better quality
float3 SvPositionToWorld(float4 SvPosition)
{
return SvPositionToTranslatedWorld(SvPosition) - View.PreViewTranslation;
}
14-15行req dot -HomWorldPos
16: field4.xyz = field3.www * -field3.xyz;差值计算
17-18行是计算UV的方法
float2 ScreenPositionToBufferUV(float4 ScreenPosition)
{
return float2(ScreenPosition.xy / ScreenPosition.w * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz);
}
19-20行:
T8 是基本光照计算好的图:
T4 来自GBuffer.OutGBufferE
GBuffer 贴图的信息可以查看void EncodeGBuffer(
FGBufferData GBuffer,
out float4 OutGBufferA,
out float4 OutGBufferB,
out float4 OutGBufferC,
out float4 OutGBufferD,
out float4 OutGBufferE,
out float4 OutGBufferVelocity,
float QuantizationBias = 0 // -0.5 to 0.5 random float. Used to bias quantization.
)
函数,内部有具体信息
OutGBufferE 的计算来自PrecomputedShadowFactors,大致计算如下:
GBuffer.PrecomputedShadowFactors = !(GBuffer.SelectiveOutputMask & SKIP_PRECSHADOW_MASK) ? InGBufferE : ((GBuffer.SelectiveOutputMask & ZERO_PRECSHADOW_MASK) ? 0 : 1);