概述
hlsl源码如下:
struct PSInput
{
float4 position : SV_POSITION;
float4 color : COLOR;
float4 color2 : COLOR2;
};
PSInput VS(float4 pos : POSITION, float4 color : COLOR)
{
PSInput result;
result.position = pos;
result.color = color;
return result;
}
float4 PS(PSInput input) : SV_Target
{
return input.color + input.color2;
}
在ps生成nir的时候出现了input的base都是0的情况,具体如下:
nir_lower_io
shader: MESA_SHADER_FRAGMENT
source_sha1: {0x66390dc6, 0x70183d7d, 0x85314169, 0x1ea4ea16, 0x26fa6adc}
internal: false
stage: 4
next_stage: 0
inputs_read: 33-34
outputs_written: 4
subgroup_size: 0
bit_sizes_float: 0x20
bit_sizes_int: 0x20
origin_upper_left: true
inputs: 0
outputs: 0
uniforms: 0
decl_var system INTERP_MODE_NONE none vec4 SV_Position (SYSTEM_VALUE_FRAG_COORD)
decl_var shader_in INTERP_MODE_NONE none vec4 COLOR (VARYING_SLOT_VAR1.xyzw, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 COLOR_2 (VARYING_SLOT_VAR2.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 SV_Target (FRAG_RESULT_DATA0.xyzw, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0 = deref_var &COLOR_2 (shader_in vec4)
32 %10 = load_const (0x00000000 = 0.000000)
32x4 %11 = @load_input (%10) (base=0, range=1, component=0, dest_type=float32, io location=VARYING_SLOT_VAR2 slots=1) // COLOR_2
32 %2 = deref_var &COLOR (shader_in vec4)
32 %12 = load_const (0x00000000 = 0.000000)
32x4 %13 = @load_input (%12) (base=0, range=1, component=0, dest_type=float32, io location=VARYING_SLOT_VAR1 slots=1) // COLOR
32 %4 = fadd %13.x, %11.x
32 %5 = fadd %13.y, %11.y
32 %6 = fadd %13.z, %11.z
32 %7 = fadd %13.w, %11.w
32 %8 = deref_var &SV_Target (shader_out vec4)
32x4 %9 = vec4 %4, %5, %6, %7
@store_deref (%8, %9) (wrmask=xyzw, access=none)
return (pass_flags: 0xcd)
// succs: b1
block b1:
}
其中%11和%13的load_input的base都是0,出现错误。
分析
用mesa源代码编译好,使用mesa编译出现的驱动去绘制三角形。fs源代码如下
#version 430
layout(location = 1) in vec4 COLOR;
layout(location = 2) in vec4 COLOR2;
out vec4 FragColor;
void main()
{
FragColor = COLOR + COLOR2;
}
nir如下:
shader: MESA_SHADER_VERTEX
source_blake3: {0x7338de06, 0x8f93c5b1, 0xe49d7e79, 0xe70d38eb, 0xcfc0ab1f, 0x0f3bb3fd, 0xfc42e485, 0xb2e36f24}
name: GLSL3
internal: false
stage: 0
next_stage: 4
inputs_read: 15,18
outputs_written: 0,33-34
subgroup_size: 1
bit_sizes_float: 0x20
bit_sizes_int: 0x20
flrp_lowered: true
inputs: 2
outputs: 3
uniforms: 0
decl_var shader_in INTERP_MODE_NONE none vec3 aPos (VERT_ATTRIB_GENERIC0.xyz, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 aPos2 (VERT_ATTRIB_GENERIC3.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 gl_Position (VARYING_SLOT_POS.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 COLOR (VARYING_SLOT_VAR1.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 COLOR2 (VARYING_SLOT_VAR2.xyzw, 2, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0(driver_location=0) = deref_var &aPos (shader_in vec3)
32x3 %1 = @load_deref (%0) (access=none)
32 %2(driver_location=1) = deref_var &aPos2 (shader_in vec4)
32x4 %3 = @load_deref (%2) (access=none)
32 %4 = load_const (0x3f800000 = 1.000000 = 1065353216)
32x4 %5 = vec4 %1.x, %1.y, %1.z, %4
32x4 %6 = fadd %5, %3
32 %7(driver_location=0) = deref_var &gl_Position (shader_out vec4)
@store_deref (%7, %6) (wrmask=xyzw, access=none)
32 %8(driver_location=1) = deref_var &COLOR (shader_out vec4)
@store_deref (%8, %3) (wrmask=xyzw, access=none)
32 %9(driver_location=2) = deref_var &COLOR2 (shader_out vec4)
@store_deref (%9, %3) (wrmask=xyzw, access=none)
// succs: b1
block b1:
}
其中driver_location是自己添加上的,其实就是base。
由上面可以知道,mesa做了处理,说明我们漏掉了一些步骤。
解决
通过调试源码可以知道,添加以下代码可以解决问题。
/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
* may need to fix up varying slots so the glsl->nir path is aligned
* with the anything->tgsi->nir path.
*/
static void
st_nir_fixup_varying_slots(/*struct st_context* st, */nir_shader* shader, nir_variable_mode mode)
{
//if (st->needs_texcoord_semantic)
// return;
/* This is called from finalize, but we don't want to do this adjustment twice. */
//assert(!st->allow_st_finalize_nir_twice);
nir_foreach_variable_with_modes(var, shader, mode)
{
if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location < VARYING_SLOT_PATCH0)
{
var->data.location += 9;
}
else if (var->data.location == VARYING_SLOT_PNTC)
{
var->data.location = VARYING_SLOT_VAR8;
}
else if ((var->data.location >= VARYING_SLOT_TEX0) &&
(var->data.location <= VARYING_SLOT_TEX7))
{
var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
}
}
}
void
st_nir_assign_varying_locations(/*struct st_context* st, */nir_shader* nir)
{
/* Lowered IO don't have variables, so exit. */
if (nir->info.io_lowered)
return;
if (nir->info.stage == MESA_SHADER_VERTEX)
{
nir_assign_io_var_locations(nir, nir_var_shader_out,
&nir->num_outputs,
nir->info.stage);
st_nir_fixup_varying_slots(/*st, */nir, nir_var_shader_out);
}
else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
nir->info.stage == MESA_SHADER_TESS_CTRL ||
nir->info.stage == MESA_SHADER_TESS_EVAL)
{
nir_assign_io_var_locations(nir, nir_var_shader_in,
&nir->num_inputs,
nir->info.stage);
st_nir_fixup_varying_slots(/*st, */nir, nir_var_shader_in);
nir_assign_io_var_locations(nir, nir_var_shader_out,
&nir->num_outputs,
nir->info.stage);
st_nir_fixup_varying_slots(/*st, */nir, nir_var_shader_out);
}
else if (nir->info.stage == MESA_SHADER_FRAGMENT)
{
nir_assign_io_var_locations(nir, nir_var_shader_in,
&nir->num_inputs,
nir->info.stage);
st_nir_fixup_varying_slots(/*st, */nir, nir_var_shader_in);
nir_assign_io_var_locations(nir, nir_var_shader_out,
&nir->num_outputs,
nir->info.stage);
}
else if (nir->info.stage == MESA_SHADER_COMPUTE)
{
/* TODO? */
}
else {
unreachable("invalid shader type");
}
}
上面的与st相关的注释是自己加的,后续需要考虑上下文。