目录
23.创建管道状态对象(pipeline state object)
基本组件的创建和绘图流程
1.头文件
#include <d3d12.h> | 最基本的dx12头文件 |
#include "d3dx12.h" | d3dx12与以往的不同,现在只有一个头文件,去官方示例里复制一份即可 |
#include <dxgi1_6.h> | dxgi相关 |
#include <D3DCompiler.h> | 着色器编译相关 |
#include <DirectXMath.h> | 数学库 |
2.类型名字
使用DirectX命名空间,然后typedef一些DXGI的类型,方便以后替换。
using namespace DirectX;
typedef IDXGIFactory7 _IDXGIFactory;
typedef IDXGIAdapter4 _IDXGIAdapter;
typedef IDXGISwapChain4 _IDXGISwapChain;
3.检测XMath支持
if (!XMVerifyCPUSupport())
{
dnd_debug(DL::DAMAGE, L"DirectXMath不支持!");
return;
}
4.开启调试层
开启调试层后,在出现错误时,被DX库检测到,就会输出原因。
#if defined(_DEBUG)
// Enable the D3D12 debug layer.
com_ptr<ID3D12Debug> debugController;
hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建调试层失败!");
return;
}
debugController->EnableDebugLayer();
#endif
4.创建DXGI的Factory接口
IDXGIFactory是DXGI的基础接口,我们需要用com_ptr保存它的指针。可能你还看到其他人用ComPtr,这是偏旧一些的代码。
//.h
com_ptr<_IDXGIFactory> _factory;
//.cpp
hr = CreateDXGIFactory1(__uuidof(_IDXGIFactory), (void**)(&_factory));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"F创建DXGI接口失败!");
return;
}
5.获取硬件适配器
IDXGIAdapter是显卡的抽象,但不一定是硬件的。通过GetHardwareAdapter函数返回硬件适配器,并attach赋予指针所有权给com_ptr<_IDXGIAdapter>。
//.h
com_ptr<_IDXGIAdapter> _adapter;
//.cpp
_IDXGIAdapter* adapter;
GetHardwareAdapter(_factory.get(), &adapter);
if (adapter == nullptr)
{
dnd_debug(DL::DAMAGE, L"获取硬件适配器失败!");
return;
}
_adapter.attach(adapter);
void GetHardwareAdapter(_IDXGIFactory* pFactory, _IDXGIAdapter** ppAdapter)
{
*ppAdapter = nullptr;
for (UINT adapterIndex = 0; ; ++adapterIndex)
{
_IDXGIAdapter* pAdapter = nullptr;
if (DXGI_ERROR_NOT_FOUND == pFactory->EnumAdapterByGpuPreference(
adapterIndex,
DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, _uuidof(_IDXGIAdapter), (void**)&pAdapter))
{
// No more adapters to enumerate.
break;
}
// Check to see if the adapter supports Direct3D 12, but don't create the
// actual device yet.
if (SUCCEEDED(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr)))
{
*ppAdapter = pAdapter;
return;
}
pAdapter->Release();
}
}
6.输出显卡信息(可选)
//输出显卡信息
DXGI_ADAPTER_DESC adapter_desc;
_adapter->GetDesc(&adapter_desc);
dnd_debug(DL::MSG, adapter_desc.Description);
dnd_debug(DL::MSG, L"显卡内存:" + to_wstring(int(adapter_desc.DedicatedVideoMemory / 1024 / 1024)));
dnd_debug(DL::MSG, L"独占内存:" + to_wstring(int(adapter_desc.DedicatedSystemMemory / 1024 / 1024)));
dnd_debug(DL::MSG, L"共享内存:" + to_wstring(int(adapter_desc.SharedSystemMemory / 1024 / 1024)));
7.创建Device
其中D3D_FEATURE_LEVEL_11_0是最低要求显卡支持的特性等级(feature level),注意在GetHardwareAdapter函数里,我们也是填的这个值。这是官方文档的方法,应该不能填再低于它的值。
hr = D3D12CreateDevice(
_adapter.get(),
D3D_FEATURE_LEVEL_11_0,
IID_PPV_ARGS(&_device));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建Device失败!");
return;
}
8.创建命令队列(command queue)
命令队列用于执行命令列表(command list),简单情况只需要创建一个。
D3D12_COMMAND_QUEUE_DESC desc;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.NodeMask = 0;
desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
hr = _device->CreateCommandQueue(&desc, IID_PPV_ARGS(&_commandQueue));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建命令队列失败!");
return;
}
9.创建交换链(swap chain)
SwapEffect | 交换模型 | DirectX12应该使用DXGI_SWAP_EFFECT_FLIP_DISCARD交换模型 |
BufferCount | 缓冲数量 | 翻转丢弃模型,此值必须大于2 |
BufferUsage | 缓冲标记 | 作为绘制表面(render target)应该填DXGI_USAGE_RENDER_TARGET_OUTPUT |
OutputWindow | 窗口句柄 | 填入窗口句柄hwnd |
Windowed | 窗口模式 | 这个值最好为TRUE,切换全屏是之后的事 |
Flag | 标记 | 填入DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH让DXGI可以自行修改缓冲大小,由于不需要响应alt+enter操作,直接填0 |
Format | 颜色格式 | 统一使用DXGI_FORMAT_R8G8B8A8_UNORM |
Width | 缓冲宽度 | 比如800 |
Height | 缓冲高度 | 比如600 |
BufferDesc其余的刷新率、扫描方式字段如下填默认值即可,因为全屏模式,还需要匹配这些信息。
而SamleDesc多重采样(MSAA)字段,Count填1,Quality填0即可。因为翻转丢弃模型不支持多重采样。
com_ptr<IDXGISwapChain> swapChain;
DXGI_SWAP_CHAIN_DESC descSwapChain;
descSwapChain.BufferCount = SWAP_CHAIN_BUFFER_COUNT;
descSwapChain.BufferDesc.Format = DXGI_FORMAT_TYPE;
descSwapChain.BufferDesc.Height = h;
descSwapChain.BufferDesc.RefreshRate.Denominator = 0;
descSwapChain.BufferDesc.RefreshRate.Numerator = 0;
descSwapChain.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
descSwapChain.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
descSwapChain.BufferDesc.Width = w;
descSwapChain.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
descSwapChain.Flags = 0;// DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
descSwapChain.OutputWindow = g_system.GetHwnd();
descSwapChain.SampleDesc.Count = 1;
descSwapChain.SampleDesc.Quality = 0;
descSwapChain.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
descSwapChain.Windowed = TRUE;
hr = _factory->CreateSwapChain(
_commandQueue.get(),
&descSwapChain,
swapChain.put()
);
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建交换链失败!");
return;
}
swapChain.as(_swapChain);
10.禁止alt+enter的功能
创建交换链关联窗口后调用才有效。
_factory->MakeWindowAssociation(g_system.GetHwnd(), DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER);
11.记录当前后背缓冲索引
UINT _frameIndex = _swapChain->GetCurrentBackBufferIndex();
12.创建绘制表面视图(rtv)堆描述符
绘制表面视图(render target view),缩写rtv。
堆描述符(heap descriptor),用于描述多个资源的抽象。所以我们创建了一个_rtvHeap来表示对rtv的引用,其中NumDescriptors字段和创建交换链时填入的缓冲数量一致。
D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
rtvHeapDesc.NodeMask = 0;
rtvHeapDesc.NumDescriptors = SWAP_CHAIN_BUFFER_COUNT;//
rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
hr = _device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&_rtvHeap));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建RTV堆描述符失败!");
return;
}
13.创建深度模板视图(dsv)堆描述符
深度缓冲用作深度测试,而模板缓冲可作为镜子效果。为了创建深度模板视图(depth stencil view),首先需要创建堆描述符。这里NumDescriptors字段填1,因为只需要1个。
D3D12_DESCRIPTOR_HEAP_DESC dsvHeapDesc = {};
dsvHeapDesc.NumDescriptors = 1;
dsvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
dsvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
hr = _device->CreateDescriptorHeap(&dsvHeapDesc, IID_PPV_ARGS(&_dsvHeap));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建深度模板缓存堆描述符失败!");
return;
}
14.获取堆描述符偏移大小
其中常量缓冲视图(constant buffer view)、无序访问视图(unordered access view)和dsv的堆描述符偏移大小是一样的。
_sizeDescriptorRtv = _device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
_sizeDescriptorCbvSrv = _device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
一个堆描述符描述了多个资源,这个偏移就是用于确定某个资源所在的位置。比如我们创建了包含N个纹理的着色器资源视图(shader resource view)堆描述符,通过以下方式切换纹理:
CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandleTex(_srvHeap->GetGPUDescriptorHandleForHeapStart(), index, g_dx._sizeDescriptorCbvSrv);
_commandList->SetGraphicsRootDescriptorTable(0, srvHandleTex);
15.创建rtv
前面创建了rtv的堆描述符,那个时候就确定了描述资源的数量(开始填入的SWAP_CHAIN_BUFFER_COUNT)。这时实际的创建rtv,由于它需要动态修改(需要响应窗口大小发生改变),我们写成可反复调用的形式:
void DirectX::_create_rtv(LONG w, LONG h)
{
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(_rtvHeap->GetCPUDescriptorHandleForHeapStart());
// Create a RTV for each frame.
for (UINT n = 0; n < SWAP_CHAIN_BUFFER_COUNT; n++)
{
HRESULT hr = _swapChain->GetBuffer(n, IID_PPV_ARGS(&_renderTargets[n]));
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"从交换链获取后背缓冲失败!");
continue;
}
_device->CreateRenderTargetView(_renderTargets[n].get(), nullptr, rtvHandle);
rtvHandle.Offset(1, _sizeDescriptorRtv);
}
}
这里构造了一个rtvHandle变量,然后每个缓冲创建一个RTV,循环的末尾调用了rtvHandle.Offset(1, _sizeDescriptorRtv);进行偏移。
16.创建命令分配器(command allocator)
命令分配器用于记录命令列表执行过的操作。它的数量等于交换链的缓冲数量。
for (UINT n = 0; n < SWAP_CHAIN_BUFFER_COUNT; n++)
{
hr = _device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&_commandAllocator[n]));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建命令分配器失败!");
return;
}
}
17.创建dsv
先通过CreateCommittedResource在显存上创建资源,再调用CreateDepthStencilView和前面dsvHeap绑定关系。
因为我暂时只使用了深度测试,所以DepthStencil.Stencil字段为0,而DepthStencil.Depth字段我设为的是1.0f,这个值只能在[0, 1]的范围。在构建投影矩阵时,所有顶点的z值都会被变换到这个范围。而2d绘制使用的是正交投影矩阵,我们手动设置z值在此范围即可,这也是精灵的遮挡关系。
并且每帧都需要重置dsv的缓冲值,类似于清屏。
它依赖于交换链缓冲的大小,所以需要多次调用,写成函数的形式。
void DirectX::_create_dsv(LONG w, LONG h)
{
D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilDesc = {};
depthStencilDesc.Format = DXGI_FORMAT_D32_FLOAT;
depthStencilDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
depthStencilDesc.Flags = D3D12_DSV_FLAG_NONE;
D3D12_CLEAR_VALUE depthOptimizedClearValue = {};
depthOptimizedClearValue.Format = DXGI_FORMAT_D32_FLOAT;
depthOptimizedClearValue.DepthStencil.Depth = DEPTH_VALUE_START;
depthOptimizedClearValue.DepthStencil.Stencil = 0;
HRESULT hr = _device->CreateCommittedResource(
pointer(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
D3D12_HEAP_FLAG_NONE,
pointer(CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_D32_FLOAT, w, h, 1, 0, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&depthOptimizedClearValue,
IID_PPV_ARGS(&_depthStencil));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建dsv失败!");
return;
}
_device->CreateDepthStencilView(_depthStencil.get(), &depthStencilDesc, _dsvHeap->GetCPUDescriptorHandleForHeapStart());
}
18.创建命令列表(command list)
在命令分配器的基础上创建命令列表,通过_frameIndex只需要创建1个。
hr = _device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, _commandAllocator[_frameIndex].get(), nullptr, IID_PPV_ARGS(&_commandList));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建命令列表失败!");
return;
}
19.创建用于同步的资源
当命令列表的操作被命令队列执行时,这时从内存向显卡传输资源,但这是异步的,需要时间来完成这些操作。通过同步资源我们可以知道,何时显卡完成了操作,此时就可以释放内存资源等等。
//.h
HANDLE _fenceEvent;
com_ptr<ID3D12Fence> _fence;
UINT64 _fenceValues[SWAP_CHAIN_BUFFER_COUNT] = {};
//.cpp
// Create synchronization objects and wait until assets have been uploaded to the GPU.
hr = _device->CreateFence(_fenceValues[_frameIndex], D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_fence));
_fenceValues[_frameIndex]++;
// Create an event handle to use for frame synchronization.
_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (_fenceEvent == nullptr)
{
//hr = HRESULT_FROM_WIN32(GetLastError()));
dnd_debug(DL::DAMAGE, L"创建同步事件失败!");
return;
}
等待GPU完成操作的函数。
void DirectX::WaitForGpu()
{
HRESULT hr = E_FAIL;
// Schedule a Signal command in the queue.
hr = _commandQueue->Signal(_fence.get(), _fenceValues[_frameIndex]);
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"命令队列Signal失败!");
return;
}
// Wait until the fence has been processed.
hr = _fence->SetEventOnCompletion(_fenceValues[_frameIndex], _fenceEvent);
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"围栏设置同步对象失败!");
return;
}
WaitForSingleObjectEx(_fenceEvent, INFINITE, FALSE);
// Increment the fence value for the current frame.
_fenceValues[_frameIndex]++;
}
20.创建根签名(root signature)
根签名简单理解,需要与shader的资源一致。最常见的需要纹理、矩阵这两种资源,纹理归于srv,而矩阵属于cbv。还需要创建一个动态的采样器(sampler)资源,采样器也可以是静态的。
// Create the root signature.
D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData = {};
// This is the highest version the sample supports. If CheckFeatureSupport succeeds, the HighestVersion returned will not be greater than this.
featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1;
if (FAILED(g_dx._device->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData))))
{
featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_0;
}
CD3DX12_DESCRIPTOR_RANGE1 ranges[3];
ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC);//纹理资源
ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, 0);//采样器
ranges[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC);//常量区
CD3DX12_ROOT_PARAMETER1 rootParameters[3];
rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_PIXEL);
rootParameters[1].InitAsDescriptorTable(1, &ranges[1], D3D12_SHADER_VISIBILITY_PIXEL);
rootParameters[2].InitAsDescriptorTable(1, &ranges[2], D3D12_SHADER_VISIBILITY_ALL);//所有可见
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc;
rootSignatureDesc.Init_1_1(_countof(rootParameters), rootParameters, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
com_ptr<ID3DBlob> signature;
com_ptr<ID3DBlob> error;
hr = D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, featureData.HighestVersion, signature.put(), error.put());
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"序列化根签名失败:" + String::Mbtowc((char*)error->GetBufferPointer(), false));
return;
}
hr = g_dx._device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&_rootSignature));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建根签名失败!");
return;
}
可以看到,0为srv,1为sampler,2为cbv。这与shader文件一致:
//2d.hlsl
cbuffer cb0 : register(b0)
{
float4x4 g_mWorldViewProj;
};
Texture2D g_txDiffuse : register(t0);
SamplerState g_sampler : register(s0);
只有cbv标记了全部可见(D3D12_SHADER_VISIBILITY_ALL ),因为顶点着色器(vertex shader)需要进行顶点变换:
//2d.hlsl
PSInput VSMain(VSInput input)
{
PSInput result;
result.position = mul(float4(input.position, 1.0f), g_mWorldViewProj);
result.color = input.color;
result.uv = input.uv;
return result;
}
而像素着色器(pixel shader)使用了纹理和采样器:
//2d.hlsl
float4 PSMain(PSInput input) : SV_TARGET
{
float4 Color = g_txDiffuse.Sample(g_sampler, input.uv) * input.color;
clip(Color.a - 0.004f);
return Color;
}
21.创建输入布局(input element)
输入布局描述了与顶点的结构,分别是位置、颜色、UV。
D3D12_INPUT_ELEMENT_DESC inputElementDescs[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
};
这是2d绘图需要的顶点,3d绘图则可能不需要顶点的颜色,但需要加上法线、切线等属性通过光照混合纹理来计算它的颜色。
struct Vertex
{
XMFLOAT3 _pos;
XMFLOAT4 _color;
XMFLOAT2 _t;
};
22.编译shader
com_ptr<ID3DBlob> vertexShader;
com_ptr<ID3DBlob> pixelShader;
#if defined(_DEBUG)
// Enable better shader debugging with the graphics debugging tools.
UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT compileFlags = 0;
#endif
ID3DBlob* errors = nullptr;
hr = D3DCompileFromFile(L"2d.hlsl", nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, vertexShader.put(), &errors);
if (errors != nullptr)
{
dnd_debug(Debug::Level::INFO, String::Mbtowc((char*)errors->GetBufferPointer(), false));
errors->Release();
}
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"编译vs失败!");
return;
}
hr = D3DCompileFromFile(L"2d.hlsl", nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, pixelShader.put(), &errors);
if (errors != nullptr)
{
dnd_debug(Debug::Level::INFO, String::Mbtowc((char*)errors->GetBufferPointer(), false));
errors->Release();
}
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"编译ps失败!");
return;
}
简单的2d.hlsl文件如下:
struct VSInput
{
float3 position : POSITION;
float4 color : COLOR;
float2 uv : TEXCOORD0;
};
struct PSInput
{
float4 position : SV_POSITION;
float4 color : COLOR;
float2 uv : TEXCOORD0;
};
cbuffer cb0 : register(b0)
{
float4x4 g_mWorldViewProj;
};
Texture2D g_txDiffuse : register(t0);
SamplerState g_sampler : register(s0);
PSInput VSMain(VSInput input)
{
PSInput result;
result.position = mul(float4(input.position, 1.0f), g_mWorldViewProj);
result.color = input.color;
result.uv = input.uv;
return result;
}
float4 PSMain(PSInput input) : SV_TARGET
{
float4 Color = g_txDiffuse.Sample(g_sampler, input.uv) * input.color;
clip(Color.a - 0.004f);
return Color;
}
23.创建管道状态对象(pipeline state object)
光栅(rasterizer)状态
由于2d绘图不需要通过顶点绕序判断正面与背面,CullMode设置为NONE或许能减少GPU的消耗。
CD3DX12_RASTERIZER_DESC rasterizerStateDesc(D3D12_DEFAULT);
rasterizerStateDesc.CullMode = D3D12_CULL_MODE_NONE;
混合(blend)状态
使用如下的混合方式可以进行半透明物体的正常绘制,但BlendEnable字段现在设置的暂时是false,即未开启颜色混合。
D3D12_BLEND_DESC blend_desc;
blend_desc.AlphaToCoverageEnable = FALSE;
blend_desc.IndependentBlendEnable = FALSE;//false只使用RenderTarget[0]
blend_desc.RenderTarget[0].BlendEnable = FALSE;
blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
//两个透明物体混合,其透明度应该是较不透明者
blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_MAX;
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_DEST_ALPHA;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
blend_desc.RenderTarget[0].LogicOpEnable = FALSE;
深度模板(depth stencil)状态
这里打开了深度测试,但关闭了模板测试。D3D12_COMPARISON_FUNC_LESS标记会使绘制时,z值小于当前像素z值的才被写入。
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc;
//小于时成功
depth_stencil_desc.DepthEnable = TRUE;
depth_stencil_desc.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
depth_stencil_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
depth_stencil_desc.StencilEnable = FALSE;
最终创建PSO
创建PSO依赖从20节(创建根签名)到前面所创建的资源,通过修改一些字段,我们创建了两个PSO。一个用于非透明精灵的绘制、一个用于半透明精灵的绘制。非透明精灵的绘制关闭了颜色混合,半透明精灵的绘制打开了颜色混合,并且深度测试函数改为了小于等于时成功(D3D12_COMPARISON_FUNC_LESS_EQUAL),因为半透明精灵必须后绘制,遇到z值相同的非透明像素依旧需要写入。
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.InputLayout = { inputElementDescs, _countof(inputElementDescs) };
psoDesc.pRootSignature = _rootSignature.get();
psoDesc.VS = { reinterpret_cast<UINT8*>(vertexShader->GetBufferPointer()), vertexShader->GetBufferSize() };
psoDesc.PS = { reinterpret_cast<UINT8*>(pixelShader->GetBufferPointer()), pixelShader->GetBufferSize() };
psoDesc.RasterizerState = rasterizerStateDesc;
psoDesc.BlendState = blend_desc;//CD3DX12_BLEND_DESC(D3D12_DEFAULT);
psoDesc.DepthStencilState = depth_stencil_desc;
psoDesc.SampleMask = UINT_MAX;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
psoDesc.NumRenderTargets = 1;
psoDesc.RTVFormats[0] = DXGI_FORMAT_TYPE;
psoDesc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
psoDesc.SampleDesc.Count = 1;
//非透明任然需要深度测试,但不需要alpha混合
hr = g_dx._device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&_pipelineState[PsoType::NORMAL]));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建PSO(normal)失败!");
return;
}
psoDesc.BlendState.RenderTarget[0].BlendEnable = TRUE;
//相等也必须混合,因为背景可能是不透明物体,在DrawCall里,一定是后画的
psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
hr = g_dx._device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&_pipelineState[PsoType::TRANSLUCENT]));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建PSO(translucent)失败!");
return;
}
24.创建采样器
先创建堆描述符,再创建采样器资源:
// Describe and create a sampler descriptor heap.
D3D12_DESCRIPTOR_HEAP_DESC samplerHeapDesc = {};
samplerHeapDesc.NumDescriptors = 1;
samplerHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
samplerHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
hr = g_dx._device->CreateDescriptorHeap(&samplerHeapDesc, IID_PPV_ARGS(&_samplerHeap));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建采样器堆描述符失败!");
return;
}
// Describe and create a sampler.
D3D12_SAMPLER_DESC samplerDesc = {};
samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
samplerDesc.MinLOD = 0;
samplerDesc.MaxLOD = D3D12_FLOAT32_MAX;
samplerDesc.MipLODBias = 0.0f;
samplerDesc.MaxAnisotropy = 1;
samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS;
g_dx._device->CreateSampler(&samplerDesc, _samplerHeap->GetCPUDescriptorHandleForHeapStart());
25.创建索引缓冲(index buffer)
填充内存
这里使用了vector,如果使用数组,由于数组太大可能会栈溢出。一个精灵(四边形)有四个顶点,划分为两个三角形,则需要6个索引。
顶点绕序为 012、023,如下图所示:
vector<UINT32> indices;
indices.resize(Constant::NUM_2D_INDICES);
UINT32 n = 0;
for (UINT32 i = 0; i < Constant::NUM_2D_INDICES; i += 6)
{
indices[i + 0] = 0 + n;
indices[i + 1] = 1 + n;
indices[i + 2] = 2 + n;
indices[i + 3] = 0 + n;
indices[i + 4] = 2 + n;
indices[i + 5] = 3 + n;
n += 4;
}
创建默认堆(heap default)
我们需要在显存上创建索引缓冲资源,通过以下方式创建:
CD3DX12_HEAP_PROPERTIES heapProps2(D3D12_HEAP_TYPE_DEFAULT);
auto desc2 = CD3DX12_RESOURCE_DESC::Buffer(Constant::NUM_2D_INDICES * sizeof(UINT32));
hr = g_dx._device->CreateCommittedResource(
&heapProps2,
D3D12_HEAP_FLAG_NONE,
&desc2,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&_indexBuffer));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建索引缓存失败(DEFAULT)!");
return;
}
通过标记 D3D12_HEAP_TYPE_DEFAULT创建的CD3DX12_HEAP_PROPERTIES 结构说明资源在显存上,再通过 CD3DX12_RESOURCE_DESC::Buffer(size);指定了缓冲区的大小。D3D12_RESOURCE_STATE_COPY_DEST指定资源的状态是复制目标(copy dest)状态,资源必须是这个状态才能通过内存上传资源。
通过标记 D3D12_HEAP_TYPE_DEFAULT创建的资源叫默认堆,它是没有CPU访问权限的,只能通过上载堆给它上传资源,这个过程就是内存到显存的过程。
上载堆(heap upload)的创建
同样上载堆是类似的创建方式,只不过堆类型改为了D3D12_HEAP_TYPE_UPLOAD,然后资源的状态必须填D3D12_RESOURCE_STATE_GENERIC_READ,这是规定。它是可以被CPU访问的,也就是存在于某片内存上。
com_ptr<ID3D12Resource> indexBufferUploadHeap;
CD3DX12_HEAP_PROPERTIES heapProps3(D3D12_HEAP_TYPE_UPLOAD);
auto desc3 = CD3DX12_RESOURCE_DESC::Buffer(Constant::NUM_2D_INDICES * sizeof(UINT32));
hr = g_dx._device->CreateCommittedResource(
&heapProps3,
D3D12_HEAP_FLAG_NONE,
&desc3,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&indexBufferUploadHeap));
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"创建索引缓存失败(Upload)!");
return;
}
可以通过Map、CopyBufferRegion等接口来写入,但d3dx12有封装好的函数,简单的整体复制我们直接使用即可,先不考虑那么多(写入一部分、写入纹理等不尽相同)。
// Copy data to the intermediate upload heap and then schedule a copy
// from the upload heap to the index buffer.
D3D12_SUBRESOURCE_DATA indexData = {};
indexData.pData = (void*)indices.data();
indexData.RowPitch = Constant::NUM_2D_INDICES * sizeof(UINT32);
indexData.SlicePitch = indexData.RowPitch;
UpdateSubresources<1>(g_dx._commandList.get(), _indexBuffer.get(), indexBufferUploadHeap.get(), 0, 0, 1, &indexData);
这里使用到了命令列表,说明这个操作需要GPU同步,可以理解,从内存复制到显存,必然是需要显卡告诉我们完成了此操作。
但命令列表的执行,只是记录了操作,并没有实际的执行操作,等到命令队列执行它,才真正的开始,那个时候我们才需要等待GPU完成操作。因为vector<UINT32> indices;和com_ptr<ID3D12Resource> indexBufferUploadHeap;都是局部变量,生命周期结束后,内存也就失效了。但显然这时候内存失效是错误的,因为资源还没从内存全部复制到显存。
转换资源状态
前面创建的索引缓冲默认堆一开始是D3D12_RESOURCE_STATE_COPY_DEST状态,要作为索引缓冲资源,还需要通过资源转换屏障转换到D3D12_RESOURCE_STATE_INDEX_BUFFER状态。因为上面的创建,并没有出现index buffer相关的标记,显卡不知道这个资源的用途是索引缓冲。
auto num_barrier1 = CD3DX12_RESOURCE_BARRIER::Transition(_indexBuffer.get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_INDEX_BUFFER);
g_dx._commandList->ResourceBarrier(1, &num_barrier1);
创建索引缓冲视图(ibv)
D3D12_INDEX_BUFFER_VIEW _indexBufferView;
// Describe the index buffer view.
_indexBufferView.BufferLocation = _indexBuffer->GetGPUVirtualAddress();
_indexBufferView.Format = DXGI_FORMAT_R32_UINT;
_indexBufferView.SizeInBytes = Constant::NUM_2D_INDICES * sizeof(UINT32);
在绘制时,通过_commandList->IASetIndexBuffer(&_indexBufferView);即可设置要使用索引缓冲。
26.执行命令列表
命令列表记录完操作后,需要Close表示完成记录,再提交给命令队列执行。在最后使用WaitForGpu等待显卡完成操作。
//关闭并 添加到命令队列
// Close the command list and execute it to begin the initial GPU setup.
hr = g_dx._commandList->Close();
if (FAILED(hr))
{
dnd_debug(DL::DAMAGE, L"命令列表Close失败!");
return;
}
ID3D12CommandList* ppCommandLists[] = { g_dx._commandList.get() };
g_dx._commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
//这里必须等待资源上传到GPU
g_dx.WaitForGpu();
27.绘制流水线
到这里,所有基础组件就创建完成了,接下来是帧函数循环调用的绘制部分。
重置命令分配器和命令列表
在创建命令列表时,我们就依赖于命令分配器。这里每帧重新记录,都需要Reset。
//重新记录需要reset
hr = _commandAllocator[_frameIndex]->Reset();
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"命令分配器Reset失败!");
return;
}
// However, when ExecuteCommandList() is called on a particular command
// list, that command list can then be reset at any time and must be before
// re-recording.
hr = _commandList->Reset(_commandAllocator[_frameIndex].get(), _2d._pipelineState[Dx2D::PsoType::NORMAL].get());
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"命令列表Reset失败!");
return;
}
rtv、stv清屏
将rtv从呈现状态(D3D12_RESOURCE_STATE_PRESENT),转换到绘制目标状态(D3D12_RESOURCE_STATE_RENDER_TARGET),以进行接下来的清屏与绘制操作。顺便也将dsv清空到初始值。
//资源屏障 呈现 -> 绘制目标
// Indicate that the back buffer will be used as a render target.
auto res_barrier0 = CD3DX12_RESOURCE_BARRIER::Transition(_renderTargets[_frameIndex].get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET);
_commandList->ResourceBarrier(1, &res_barrier0);
//设置 当前绘制目标
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(_rtvHeap->GetCPUDescriptorHandleForHeapStart(), _frameIndex, _sizeDescriptorRtv);
CD3DX12_CPU_DESCRIPTOR_HANDLE dsvHandle(_dsvHeap->GetCPUDescriptorHandleForHeapStart());
_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, &dsvHandle);
//清屏
// Record commands.
const float clearColor[] = { 0.0f, 0.2f, 0.4f, 1.0f };
_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
_commandList->ClearDepthStencilView(_dsvHeap->GetCPUDescriptorHandleForHeapStart(), D3D12_CLEAR_FLAG_DEPTH, DEPTH_VALUE_START, 0, 0, nullptr);
绘制
这一步放在后面再讲。
转换rtv状态
完成绘制后,反过来操作一遍,将rtv从绘制目标状态转换到呈现状态。
// Indicate that the back buffer will now be used to present.
auto res_barrier1 = CD3DX12_RESOURCE_BARRIER::Transition(
_renderTargets[_frameIndex].get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT);
_commandList->ResourceBarrier(1, &res_barrier1);
关闭命令列表并执行
//关闭
hr = _commandList->Close();
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"命令列表关闭失败!");
return;
}
// 执行命令列表
ID3D12CommandList* ppCommandLists[] = { _commandList.get() };
_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
呈现并进行下一帧
HRESULT hr = _swapChain->Present(_bVsync ? 1 : 0, 0);
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"Present失败!");
return;
}
MoveToNextFrame();
void DirectX::MoveToNextFrame()
{
HRESULT hr = E_FAIL;
// Schedule a Signal command in the queue.
const UINT64 currentFenceValue = _fenceValues[_frameIndex];
hr = _commandQueue->Signal(_fence.get(), currentFenceValue);
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"命令队列Signal失败!");
return;
}
// Update the frame index.
_frameIndex = _swapChain->GetCurrentBackBufferIndex();
// If the next frame is not ready to be rendered yet, wait until it is ready.
if (_fence->GetCompletedValue() < _fenceValues[_frameIndex])
{
hr = _fence->SetEventOnCompletion(_fenceValues[_frameIndex], _fenceEvent);
if (FAILED(hr))
{
dnd_debug(DL::ERR, L"围栏设置同步对象失败!");
return;
}
WaitForSingleObjectEx(_fenceEvent, INFINITE, FALSE);
}
// Set the fence value for the next frame.
_fenceValues[_frameIndex] = currentFenceValue + 1;
}
28.绘图流程
//设置根签名
_commandList->SetGraphicsRootSignature(_rootSignature.get());
//设置使用的堆描述符
ID3D12DescriptorHeap* ppHeaps[] = { _cbvSrvHeap.get() ,_samplerHeap.get() };
_commandList->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps);
//设置采样器
_commandList->SetGraphicsRootDescriptorTable(1, _samplerHeap->GetGPUDescriptorHandleForHeapStart());
//设置PSO
_commandList->SetPipelineState(_pipelineState[PsoType::NORMAL].get());
//设置视口
_commandList->RSSetViewports(1, &g_dx._viewport);
//设置裁剪
_commandList->RSSetScissorRects(1, &g_dx._scissorRect);
//设置srv偏移
CD3DX12_GPU_DESCRIPTOR_HANDLE cbvSrvHandleTex(_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart(), INT(iter._canvas->_id), g_dx._sizeDescriptorCbvSrv);
_commandList->SetGraphicsRootDescriptorTable(0, cbvSrvHandleTex);
//设置cbv偏移
CD3DX12_GPU_DESCRIPTOR_HANDLE cbvSrvHandleMat(_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart(), INT(_vecCanvas.size() + mat_id), g_dx._sizeDescriptorCbvSrv);
_commandList->SetGraphicsRootDescriptorTable(2, cbvSrvHandleMat);
//设置绘制图元
_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
//设置索引缓存
_commandList->IASetIndexBuffer(&_indexBufferView);
//设置顶点缓存
_commandList->IASetVertexBuffers(0, 1, &iter._canvas->_vertexBufferView);
//绘制
_commandList->DrawIndexedInstanced(UINT((iter._end - iter._beg) / 2 * 3), 1, 0, INT(iter._beg), 0);
29.代码仓库
完整的2d绘图封装,可以参考我的仓库,我还将实现图像加载、音效、文本、网络、ui等等一系列游戏相关的组件,欢迎提出bug,或参与进来。