目录
多线程渲染可以显著提高渲染性能,特别是在现代多核CPU上。Vulkan的设计使其天然适合多线程渲染。在本节中,我们将详细探讨如何在Vulkan中实现多线程渲染,包括创建命令池和命令缓冲、多线程记录命令缓冲、以及提交命令缓冲。
1. 多线程命令缓冲记录
在Vulkan中,我们可以在多个线程中并行记录命令缓冲,然后将它们提交到图形队列。这种方法可以充分利用多核CPU的性能,提高渲染效率。
创建命令池和命令缓冲
每个线程需要自己的命令池和命令缓冲,以便独立记录命令。
std::vector<VkCommandPool> commandPools;
std::vector<std::vector<VkCommandBuffer>> commandBuffers;
void createCommandPoolsAndBuffers(size_t threadCount) {
commandPools.resize(threadCount);
commandBuffers.resize(threadCount);
for (size_t i = 0; i < threadCount; i++) {
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.queueFamilyIndex = graphicsQueueFamilyIndex;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPools[i]) != VK_SUCCESS) {
throw std::runtime_error("failed to create command pool!");
}
commandBuffers[i].resize(swapChainFramebuffers.size());
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = commandPools[i];
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = (uint32_t)commandBuffers[i].size();
if (vkAllocateCommandBuffers(device, &allocInfo, commandBuffers[i].data()) != VK_SUCCESS) {
throw std::runtime_error("failed to allocate command buffers!");
}
}
}
在上述代码中,我们为每个线程创建一个命令池,并从中分配命令缓冲。每个命令池和命令缓冲是线程独有的,确保了命令记录的并行性和独立性。
多线程记录命令缓冲
使用标准库中的线程和互斥锁来实现多线程命令缓冲记录。
void recordCommandBuffers(size_t threadIndex, size_t imageIndex) {
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
if (vkBeginCommandBuffer(commandBuffers[threadIndex][imageIndex], &beginInfo) != VK_SUCCESS) {
throw std::runtime_error("failed to begin recording command buffer!");
}
VkRenderPassBeginInfo renderPassInfo{};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderPass = renderPass;
renderPassInfo.framebuffer = swapChainFramebuffers[imageIndex];
renderPassInfo.renderArea.offset = {0, 0};
renderPassInfo.renderArea.extent = swapChainExtent;
VkClearValue clearColor = {0.0f, 0.0f, 0.0f, 1.0f};
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &clearColor;
vkCmdBeginRenderPass(commandBuffers[threadIndex][imageIndex], &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(commandBuffers[threadIndex][imageIndex], VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline);
VkBuffer vertexBuffers[] = {vertexBuffer};
VkDeviceSize offsets[] = {0};
vkCmdBindVertexBuffers(commandBuffers[threadIndex][imageIndex], 0, 1, vertexBuffers, offsets);
vkCmdBindIndexBuffer(commandBuffers[threadIndex][imageIndex], indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdBindDescriptorSets(commandBuffers[threadIndex][imageIndex], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
vkCmdDrawIndexed(commandBuffers[threadIndex][imageIndex], static_cast<uint32_t>(indices.size()), 1, 0, 0, 0);
vkCmdEndRenderPass(commandBuffers[threadIndex][imageIndex]);
if (vkEndCommandBuffer(commandBuffers[threadIndex][imageIndex]) != VK_SUCCESS) {
throw std::runtime_error("failed to record command buffer!");
}
}
void recordCommandBuffersInParallel() {
std::vector<std::thread> threads;
for (size_t i = 0; i < commandBuffers.size(); i++) {
threads.push_back(std::thread([this, i]() {
for (size_t j = 0; j < swapChainFramebuffers.size(); j++) {
recordCommandBuffers(i, j);
}
}));
}
for (auto& thread : threads) {
thread.join();
}
}
在上述代码中,recordCommandBuffers
函数负责记录单个命令缓冲,而recordCommandBuffersInParallel
函数则使用多个线程并行记录所有命令缓冲。每个线程会分别处理不同的命令池和命令缓冲,确保命令记录过程的并行执行。
2. 提交命令缓冲
在提交命令缓冲时,需要确保所有线程都已经完成命令缓冲的记录。
void drawFrame() {
vkWaitForFences(device, 1, &inFlightFence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &inFlightFence);
uint32_t imageIndex;
vkAcquireNextImageKHR(device, swapChain, UINT64_MAX, imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex);
recordCommandBuffersInParallel();
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkSemaphore waitSemaphores[] = {imageAvailableSemaphore};
VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.pWaitDstStageMask = waitStages;
std::vector<VkCommandBuffer> buffersToSubmit;
for (const auto& bufferSet : commandBuffers) {
buffersToSubmit.push_back(bufferSet[imageIndex]);
}
submitInfo.commandBufferCount = static_cast<uint32_t>(buffersToSubmit.size());
submitInfo.pCommandBuffers = buffersToSubmit.data();
VkSemaphore signalSemaphores[] = {renderFinishedSemaphore};
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = signalSemaphores;
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFence) != VK_SUCCESS) {
throw std::runtime_error("failed to submit draw command buffer!");
}
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = signalSemaphores;
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &swapChain;
presentInfo.pImageIndices = &imageIndex;
if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) {
throw std::runtime_error("failed to present swap chain image!");
}
}
在上述代码中,我们首先等待上一帧的渲染完成,然后重置相关的同步对象。接着,我们获取交换链中的下一张图像,并并行记录命令缓冲。所有命令缓冲记录完成后,我们将它们提交到图形队列并呈现图像。
详细解释
命令池和命令缓冲
- 命令池:用于管理命令缓冲的内存分配。每个线程需要一个独立的命令池,以避免多线程访问同一个命令池带来的竞争问题。
- 命令缓冲:包含一系列绘制命令。每个线程需要独立的命令缓冲,以便在并行记录命令时不会互相干扰。
多线程命令缓冲记录
- 线程同步:使用标准库的线程和互斥锁来同步多线程命令缓冲记录的过程。确保每个线程在独立的命令池和命令缓冲中记录命令,从而实现并行化。
- 命令缓冲记录:每个线程独立记录命令缓冲,包括开始记录、绑定渲染管线、绘制命令和结束记录。
提交命令缓冲
- 同步对象:在提交命令缓冲前,我们使用信号量和栅栏来确保所有命令缓冲已经记录完毕。
- 提交命令:将所有命令缓冲提交到图形队列,然后等待渲染完成并呈现图像。
通过上述步骤,我们可以在Vulkan中实现高效的多线程渲染,充分利用现代多核CPU的性能,提高渲染效率和帧率。