本部分主要来看一下在使用延迟渲染基础上采用多重采样技术来解决抗锯齿问题,至于延迟渲染便不再赘述,有兴趣可参照之前实现了解vulkan_延迟渲染。
一、实现原理:
了解延迟渲染原理的话,你肯定会知道延迟渲染的缺陷之一便是:不能使用硬件AA(MSAA),所以使用了延迟渲染之后,UE4等引擎只支持FXAA跟TXAA。
本部分我们主要是讲述在vulkan延迟渲染中实现MSAA:
- CPU端查看硬件支持的多从采样量级数;
- 通过GLSL中texelFetch函数,采样各LOD级别的纹素;
- 对每个纹素平均Ambient、Diffuse和Specular合成到最终像素颜色输出;
上述实现中我们需要注意的是:下边颜色附件与管线光栅化的两个结构体中的采样量必须设置相同使用(这儿如果不同:vulkan使用不规范,校验层会给出警告,但不会崩溃);
struct AttachmentCreateInfo
{
uint32_t width, height;
uint32_t layerCount;
VkFormat format;
VkImageUsageFlags usage;
VkSampleCountFlagBits imageSampleCount = VK_SAMPLE_COUNT_1_BIT;
};
typedef struct VkPipelineMultisampleStateCreateInfo {
VkStructureType sType;
const void* pNext;
VkPipelineMultisampleStateCreateFlags flags;
VkSampleCountFlagBits rasterizationSamples;
VkBool32 sampleShadingEnable;
float minSampleShading;
const VkSampleMask* pSampleMask;
VkBool32 alphaToCoverageEnable;
VkBool32 alphaToOneEnable;
} VkPipelineMultisampleStateCreateInfo;
二、代码实现
2.1 硬件采样量获取
首先,我们查查我们的硬件能用多少采样量。大多数现代gpu支持至少8个采样量。默认情况下,每个像素只使用一个采样点,这样就不会有重采样,在这种情况下,最终图像将保持不变。可以从与我们选择的物理设备相关的VkPhysicalDeviceProperties中提取准确的最大采样数量。我们使用的是深度缓冲区,所以我们必须同时考虑颜色和深度的采样数量—其中较小值将是我们能支持的最大值。添加一个函数为我们提取这些信息:
// 返回硬件可支持的最大样本计数
VkSampleCountFlagBits getMaxUsableSampleCount()
{
VkSampleCountFlags counts = std::min(deviceProperties.limits.framebufferColorSampleCounts, deviceProperties.limits.framebufferDepthSampleCounts);
if (counts & VK_SAMPLE_COUNT_64_BIT) { return VK_SAMPLE_COUNT_64_BIT; }
if (counts & VK_SAMPLE_COUNT_32_BIT) { return VK_SAMPLE_COUNT_32_BIT; }
if (counts & VK_SAMPLE_COUNT_16_BIT) { return VK_SAMPLE_COUNT_16_BIT; }
if (counts & VK_SAMPLE_COUNT_8_BIT) { return VK_SAMPLE_COUNT_8_BIT; }
if (counts & VK_SAMPLE_COUNT_4_BIT) { return VK_SAMPLE_COUNT_4_BIT; }
if (counts & VK_SAMPLE_COUNT_2_BIT) { return VK_SAMPLE_COUNT_2_BIT; }
return VK_SAMPLE_COUNT_1_BIT;
}
之后在主程序中调用获取:
VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_1_BIT;
sampleCount = getMaxUsableSampleCount();
2.2 U-Buffer数据交互
我们主要说下重要部分,至于其余常规代码便不再赘述。
uint32_t specializationData = sampleCount;
VkSpecializationInfo specializationInfo;
specializationInfo.mapEntryCount = 1;
specializationInfo.pMapEntries = &specializationEntry;
specializationInfo.dataSize = sizeof(specializationData);
specializationInfo.pData = &specializationData;
rasterizationState.cullMode = VK_CULL_MODE_FRONT_BIT;
// With MSAA
shaderStages[0] = loadShader("deferred.vert.spv", VK_SHADER_STAGE_VERTEX_BIT);
shaderStages[1] = loadShader("deferred.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT);
shaderStages[1].pSpecializationInfo = &specializationInfo;
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCI, nullptr, &pipelines.deferred));
// No MSAA (1 sample)
specializationData = 1;
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCI, nullptr, &pipelines.deferredNoMSAA));
在创建管线的时候我们主要通过特化推入常量来控制作色器中的采样级数。
我们也可以通过设置VkPipelineMultisampleStateCreateInfo中的sampleShadingEnable来控制最小采样率来优化MSAA效果。
2.3 着色器
2.3.1 G-Buffer着色器
顶点着色器
#version 450
layout (location = 0) in vec4 inPos;
layout (location = 1) in vec2 inUV;
layout (location = 2) in vec3 inColor;
layout (location = 3) in vec3 inNormal;
layout (location = 4) in vec3 inTangent;
layout (binding = 0) uniform UBO
{
mat4 projection;
mat4 model;
mat4 view;
vec4 instancePos[3];
} ubo;
layout (location = 0) out vec3 outNormal;
layout (location = 1) out vec2 outUV;
layout (location = 2) out vec3 outColor;
layout (location = 3) out vec3 outWorldPos;
layout (location = 4) out vec3 outTangent;
out gl_PerVertex
{
vec4 gl_Position;
};
void main()
{
vec4 tmpPos = vec4(inPos.xyz, 1.0) + ubo.instancePos[gl_InstanceIndex];
gl_Position = ubo.projection * ubo.view * ubo.model * tmpPos;
outUV = inUV;
// 世界空间下坐标
outWorldPos = vec3(ubo.model * tmpPos);
// 世界空间下法线
mat3 mNormal = transpose(inverse(mat3(ubo.model)));
outNormal = mNormal * normalize(inNormal);
outTangent = mNormal * normalize(inTangent);
// 当前像素颜色值
outColor = inColor;
}
片元着色器
#version 450
layout (binding = 1) uniform sampler2D samplerColor;
layout (binding = 2) uniform sampler2D samplerNormalMap;
layout (location = 0) in vec3 inNormal;
layout (location = 1) in vec2 inUV;
layout (location = 2) in vec3 inColor;
layout (location = 3) in vec3 inWorldPos;
layout (location = 4) in vec3 inTangent;
layout (location = 0) out vec4 outPosition;
layout (location = 1) out vec4 outNormal;
layout (location = 2) out vec4 outAlbedo;
void main()
{
outPosition = vec4(inWorldPos, 1.0);
// 切线空间下计算输出数据
vec3 N = normalize(inNormal);
vec3 T = normalize(inTangent);
vec3 B = cross(N, T);
mat3 TBN = mat3(T, B, N);
vec3 tnorm = TBN * normalize(texture(samplerNormalMap, inUV).xyz * 2.0 - vec3(1.0));
outNormal = vec4(tnorm, 1.0);
outAlbedo = texture(samplerColor, inUV);
}
常规着色器便不再赘述。
2.3.2 光照混合着色器
顶点着色器
#version 450
layout (location = 0) out vec2 outUV;
void main()
{
outUV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);
gl_Position = vec4(outUV * 2.0f - 1.0f, 0.0f, 1.0f);
}
//片元着色器
#version 450
layout (binding = 1) uniform sampler2DMS samplerPosition;
layout (binding = 2) uniform sampler2DMS samplerNormal;
layout (binding = 3) uniform sampler2DMS samplerAlbedo;
layout (location = 0) in vec2 inUV;
layout (location = 0) out vec4 outFragcolor;
struct Light {
vec4 position;
vec3 color;
float radius;
};
layout (binding = 4) uniform UBO
{
Light lights[6];
vec4 viewPos;
int debugDisplayTarget;
} ubo;
layout (constant_id = 0) const int NUM_SAMPLES = 8;
#define NUM_LIGHTS 6
// 环境光部分采样处理(此处可优化进主函数中减少调用)
vec4 resolve(sampler2DMS tex, ivec2 uv)
{
vec4 result = vec4(0.0);
for (int i = 0; i < NUM_SAMPLES; i++)
{
vec4 val = texelFetch(tex, uv, i);
result += val;
}
// 平均采样值
return result / float(NUM_SAMPLES);
}
vec3 calculateLighting(vec3 pos, vec3 normal, vec4 albedo)
{
vec3 result = vec3(0.0);
for(int i = 0; i < NUM_LIGHTS; ++i)
{
// 点到光源方向向量
vec3 L = ubo.lights[i].position.xyz - pos;
// 从光线到像素位置的距离
float dist = length(L);
// 点到视点的向量
vec3 V = ubo.viewPos.xyz - pos;
V = normalize(V);
L = normalize(L);
// 衰减系数
float atten = ubo.lights[i].radius / (pow(dist, 2.0) + 1.0);
// 漫反射部分
vec3 N = normalize(normal);
float NdotL = max(0.0, dot(N, L));
vec3 diff = ubo.lights[i].color * albedo.rgb * NdotL * atten;
// 高光部分
vec3 R = reflect(-L, N);
float NdotR = max(0.0, dot(R, V));
vec3 spec = ubo.lights[i].color * albedo.a * pow(NdotR, 8.0) * atten;
result += diff + spec;
}
return result;
}
void main()
{
ivec2 attDim = textureSize(samplerPosition);
ivec2 UV = ivec2(inUV * attDim);
// G-Buffer展示
if (ubo.debugDisplayTarget > 0) {
switch (ubo.debugDisplayTarget) {
case 1:
outFragcolor.rgb = texelFetch(samplerPosition, UV, 0).rgb;
break;
case 2:
outFragcolor.rgb = texelFetch(samplerNormal, UV, 0).rgb;
break;
case 3:
outFragcolor.rgb = texelFetch(samplerAlbedo, UV, 0).rgb;
break;
case 4:
outFragcolor.rgb = texelFetch(samplerAlbedo, UV, 0).aaa;
break;
}
outFragcolor.a = 1.0;
return;
}
#define ambient 0.15
// 环境光部分
vec4 alb = resolve(samplerAlbedo, UV);
vec3 fragColor = vec3(0.0);
// 计算光照场景下的MSAA
for (int i = 0; i < NUM_SAMPLES; i++)
{
vec3 pos = texelFetch(samplerPosition, UV, i).rgb;
vec3 normal = texelFetch(samplerNormal, UV, i).rgb;
vec4 albedo = texelFetch(samplerAlbedo, UV, i);
fragColor += calculateLighting(pos, normal, albedo);
}
fragColor = (alb.rgb * ambient) + fragColor / float(NUM_SAMPLES);
outFragcolor = vec4(fragColor, 1.0);
}
此处的片元着色器是重点实现部分。
我们可以看得在外部通过pipeline的推入常量来控制多重采样级数,在着色器中,我们主要是用过texelFetch来采样不同级别的lod纹理,之后进行平均化处理来模拟MSAA颜色均衡输出。
运行后我们可以看一下开启MSAA与不开启的对比图如下: