前言
上一章深入剖析了Vulkan渲染过程中的核心组件VkPipeline,以及vsg中针对图形渲染管线的封装GraphicsPipeline,本章将继续深入探讨视景器准备过程中的最后一章视景器编译,并进一步深入vsg中显存的传输机制,即vsg::TransferTask的设计。vsg中GPU数据的传递包含编译(Compile)和数据传递(transfer)两个过程。由于篇幅有限,后续内容将分为三章进行分析,GPU资源内存及其管理、vsg中vulkan资源的编译(包含视景器编译)、vsg::TransferTask机制。本章作为开篇,将分析GPU资源内存及其管理,vsg中为了提高设备内存的利用率,同时减少内存(GPU)碎片,采用GPU资源内存池机制(vsg::MemoryBufferPools)管理逻辑缓存(VkBuffer)与物理内存(VkDeviceMemory),本章作为后续文章的基础,方便更好理解vsg的编译(Compile)与数据传输(Transfer)机制。
目录
- 1 Vulkan中创建顶点和图元缓冲对象
- 2 vsg::DeviceMemory与vsg::Buffer
- 3 GPU资源内存池---vsg::MemoryBufferPools
1 Vulkan中创建顶点缓冲对象
本小节以Vulkan中创建顶点缓冲对象为例分析vulkan中资源的分配与创建过程。
创建顶点缓冲区(VkBuffer),VK_BUFFER_USAGE_VERTEX_BUFFER_BIT 表示是一个顶点缓冲区,VK_SHARING_MODE_EXCLUSIVE 表示该缓冲区由一个QueueFamily占用。
VkBufferCreateInfo bufferInfo {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // 结构体类型
nullptr, // 扩展信息(无)
0, // 标志位(无特殊标志)
sizeof(Vertex) * vertices.size(), // 缓冲区大小(顶点数据总大小)
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // 用途:顶点缓冲区
VK_SHARING_MODE_EXCLUSIVE, // 共享模式:独占访问
0, nullptr // 队列家族索引(无)
};
VkBuffer vertexBuffer = nullptr;
VkResult result = vkCreateBuffer(_device, &bufferInfo, nullptr, &vertexBuffer);
if (result != VK_SUCCESS) {
printf("Failed to create vertex buffer.\n");
return nullptr;
}
查询内存需求
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(_device, vertexBuffer, &memRequirements);
选择合适的内存类型,其中VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT表示CPU可访问,VK_MEMORY_PROPERTY_HOST_COHERENT_BIT表示CPU和GPU内存自动同步。
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(_physicalDevice, &memProperties);
uint32_t memoryTypeIndex = 0,
propsBit = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
for (uint32_t i = 0; i < memProperties.memoryTypeCount; ++i) {
if ((memRequirements.memoryTypeBits & (1 << i)) &&
(memProperties.memoryTypes[i].propertyFlags & propsBit) == propsBit) {
memoryTypeIndex = i;
}
}
分配设备内存
VkMemoryAllocateInfo allocInfo {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // 结构体类型
nullptr, // 扩展信息(无)
memRequirements.size, // 分配的内存大小
memoryTypeIndex // 内存类型索引
};
VkDeviceMemory vertexBufferMemory = nullptr;
result = vkAllocateMemory(_device, &allocInfo, nullptr, &vertexBufferMemory);
if (result != VK_SUCCESS) {
printf("Failed to create vertex buffer memory.\n");
return nullptr;
}
绑定缓冲区与内存
vkBindBufferMemory(_device, vertexBuffer, vertexBufferMemory, 0);
填充顶点或图元数据
void* data = nullptr;
vkMapMemory(_device, vertexBufferMemory, 0, bufferInfo.size, 0, &data);
memcpy(data, vertices.data(), (size_t)bufferInfo.size);
vkUnmapMemory(_device, vertexBufferMemory);
2 vsg::DeviceMemory与vsg::Buffer
vsg::DeviceMemory是对VkDeviceMemory的封装,实现到CPU或GPU上的内存映射。具体取决于内存属性,vsg::DeviceMemory对其进行了封装,其成员变量如下:
VkDeviceMemory _deviceMemory;
VkMemoryRequirements _memoryRequirements;
VkMemoryPropertyFlags _properties;
ref_ptr<Device> _device;
mutable std::mutex _mutex;
MemorySlots _memorySlots;
当_properties取值为VK_MEOMRY_PROPERTY_HOST_VISIBLE_BIT时,表示内存位于CPU可访问区域,取值为VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT时,表示内存位于GPU专用的高速内存,CPU无法直接访问,取值为VK_MEMORY_PROPERTY_HOST_COHERENT_BIT时,表示内存在CPU和GPU间是自动同步,无需手动刷新。
其中MemorySlots为内部由vsg::Allocator、vsg::DeviceMemory、vsg::Buffer使用,用于管理CPU或GPU内存块的子分配,解决内存分配碎片化的问题。
vsg::Buffer是对VkBuffer和VkBufferCreateInfo设置的封装,vsg::Buffer 通过 VkBuffer 绑定到 DeviceMemory 的内存块,其成员变量如下:
struct VulkanData
{
VkBuffer buffer = VK_NULL_HANDLE;
ref_ptr<DeviceMemory> deviceMemory;
VkDeviceSize memoryOffset = 0;
VkDeviceSize size = 0;
ref_ptr<Device> device;
void release();
};
vk_buffer<VulkanData> _vulkanData;
mutable std::mutex _mutex;
MemorySlots _memorySlots;
其中_memorySlots用于管理vsg::Buffer中的子内存。
在 VSG (VulkanSceneGraph) 中,vsg::Buffer 和 vsg::DeviceMemory 分别实现了对 Vulkan 逻辑缓冲区和物理内存的高层封装管理。vsg中通过偏移实现多个vsg::Buffer与单个vsg::DeviceMemory绑定,同时通过MemorySlots实现多个vsg::Data与单个vsg::Buffer的关联。其核心是对vulkan中vkBindBufferMemory实现多个VkBuffer与VkDeviceMemory绑定的封装,典型的Vulkan代码实现如下。
// 创建两个缓冲区
VkBuffer buffer1, buffer2;
vkCreateBuffer(device, &bufferInfo1, nullptr, &buffer1);
vkCreateBuffer(device, &bufferInfo2, nullptr, &buffer2);
// 查询内存需求
VkMemoryRequirements req1, req2;
vkGetBufferMemoryRequirements(device, buffer1, &req1);
vkGetBufferMemoryRequirements(device, buffer2, &req2);
// 计算总内存大小(考虑对齐)
VkDeviceSize totalSize = (req1.size + req1.alignment - 1) & ~(req1.alignment - 1); // 对齐缓冲1
totalSize += (req2.size + req2.alignment - 1) & ~(req2.alignment - 1); // 对齐缓冲2
// 分配一块足够大的内存
VkMemoryAllocateInfo allocInfo = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = totalSize,
.memoryTypeIndex = findMemoryType(req1.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
};
VkDeviceMemory deviceMemory;
vkAllocateMemory(device, &allocInfo, nullptr, &deviceMemory);
// 绑定缓冲区到内存(按对齐偏移)
vkBindBufferMemory(device, buffer1, deviceMemory, 0); // 缓冲1从0开始
vkBindBufferMemory(device, buffer2, deviceMemory, (req1.size + req1.alignment - 1) & ~(req1.alignment - 1)); // 缓冲2紧随其后
其中内存对齐本质是将内存设置为alignment的整数倍(向上取整):
VkDeviceSize totalSize = (req1.size + req1.alignment - 1) & ~(req1.alignment - 1); // 对齐缓冲1
totalSize += (req2.size + req2.alignment - 1) & ~(req2.alignment - 1); // 对齐缓冲2
3 GPU资源内存池---vsg::MemoryBufferPools
MemoryBufferPools 管理着一个由 vsg::DeviceMemory 和 vsg::Buffer 组成的资源池。提供的方法可用于从池中获取 Buffer,并通过共享内存机制提高设备内存的利用率,同时减少内存(GPU)碎片。
class VSG_DECLSPEC MemoryBufferPools : public Inherit<Object, MemoryBufferPools>
{
public:
MemoryBufferPools(const std::string& name, ref_ptr<Device> in_device, const ResourceRequirements& in_resourceRequirements = {});
std::string name;
ref_ptr<Device> device;
VkDeviceSize minimumBufferSize = 16 * 1024 * 1024;
VkDeviceSize minimumDeviceMemorySize = 16 * 1024 * 1024;
VkDeviceSize computeMemoryTotalAvailable() const;
VkDeviceSize computeMemoryTotalReserved() const;
VkDeviceSize computeBufferTotalAvailable() const;
VkDeviceSize computeBufferTotalReserved() const;
ref_ptr<BufferInfo> reserveBuffer(VkDeviceSize totalSize, VkDeviceSize alignment, VkBufferUsageFlags bufferUsageFlags, VkSharingMode sharingMode, VkMemoryPropertyFlags memoryProperties);
using DeviceMemoryOffset = std::pair<ref_ptr<DeviceMemory>, VkDeviceSize>;
DeviceMemoryOffset reserveMemory(VkMemoryRequirements memRequirements, VkMemoryPropertyFlags memoryProperties, void* pNextAllocInfo = nullptr);
protected:
mutable std::mutex _mutex;
// transfer data settings
using MemoryPools = std::vector<ref_ptr<DeviceMemory>>;
MemoryPools memoryPools;
using BufferPools = std::vector<ref_ptr<Buffer>>;
BufferPools bufferPools;
};
如上代码所示,为vsg::MemoryBufferPools对应的头文件,其管理了两个对象MemoryPools和BufferPools,提供了两个接口reserveBuffer与reserveMemory,分别用于vsg::Buffer和vsg::DeviceMemory的申请。
对于reserverBuffer其返回值为vsg::BufferInfo,封装了vsg::Buffer以及偏移,reserveMemory返回值为DeviceMemoryOffset,对应vsg::DeviceMemory以及偏移。本章暂时将reserveBuffer与reserveMemory函数的具体实现放置到待分析项中,通过类中的成员变量可看出,通过参数的形式对申请资源的大小做了限制,对应的变量为minimumBufferSize、minimumDeviceMemorySize,默认值都为16MB。
文末:本章在上一篇文章的基础上,深入分析了vsg中对VkBuffer和VkDeviceMemory的封装vsg::Buffer与vsg::DeviceMemory,同时vsg中为了提高设备内存的利用率,同时减少内存(GPU)碎片,采用了GPU资源内存池机制来管理GPU资源相关的内存分配,通过设计vsg::MemoryBufferPools实现对逻辑缓冲区和物理内存的分配,下章将深入分析vsg中vulkan资源的编译(包含视景器编译)。
待分析项:MemoryBufferPools::reserveBuffer与MemoryBufferPools::reserveMemory具体实现