当前学习状态为计算机图形学为0,关于着色器、纹理等概念不是很清楚,目前在尝试使用vulkan完成通用计算,sample code参考github,只不过自己重新照着抄了一份,代码如下:
#include <vulkan/vulkan.h>
#include <stdio.h>
#include <stdlib.h>
#include<windows.h>
unsigned long long GetTime(void)
{
LARGE_INTEGER m_liPerfFreq = { 0 };
LARGE_INTEGER m_liPerfStart = { 0 };
QueryPerformanceFrequency(&m_liPerfFreq);
QueryPerformanceCounter(&m_liPerfStart);
return (unsigned long long)(m_liPerfStart.QuadPart * 1.0 * 1000 * 1000 / m_liPerfFreq.QuadPart);
}
#define LOG_VULKAN printf
static void check_vkresult(const char* funcname, VkResult res)
{
if (res == VK_SUCCESS) {
fprintf(stderr, "%s success\n", funcname);
return;
}
else
{
fprintf(stderr, "%s failed: %d\n", funcname, res);
return;
}
}
#define CALL_VK(Func, Param) check_vkresult(#Func, Func Param)
void CreateVulkanInstance(VkInstance *instance)
{
VkApplicationInfo app_info = {};
app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
app_info.pNext = NULL;
app_info.pApplicationName = "hello world";
app_info.applicationVersion = 1;
app_info.pEngineName = "hello world";
app_info.engineVersion = 1;
app_info.apiVersion = VK_MAKE_VERSION(1, 1, 2);
VkInstanceCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
info.pNext = NULL;
info.flags = 0;
info.pApplicationInfo = &app_info;
info.enabledLayerCount = 0;
info.ppEnabledLayerNames = NULL;
info.enabledExtensionCount = 0;
info.ppEnabledExtensionNames = NULL;
CALL_VK(vkCreateInstance, (&info, NULL, instance)); //useless
}
void SelectVulkanPhyDevice(VkInstance instance, VkPhysicalDevice* phy_device)
{
unsigned int device_count = 0;
CALL_VK(vkEnumeratePhysicalDevices, (instance, &device_count, NULL));
VkPhysicalDevice*devices = (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * device_count);
CALL_VK(vkEnumeratePhysicalDevices, (instance, &device_count, devices));
for (int i = 0; i < device_count; i++)
{
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(devices[i], &props);
LOG_VULKAN("%s\n", props.deviceName);
}
*phy_device = devices[0];
free(devices);
}
void SelectVulkanQueueCluster(VkPhysicalDevice phy_device, VkQueueFlags queueflag, int* queue_idx)
{
unsigned int queue_count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(phy_device, &queue_count, NULL);
VkQueueFamilyProperties *properties = (VkQueueFamilyProperties*)malloc(queue_count * sizeof(VkQueueFamilyProperties));
vkGetPhysicalDeviceQueueFamilyProperties(phy_device, &queue_count, properties);
bool flag = false;
for (int i = 0; i < queue_count; i++)
{
if (properties[i].queueFlags | queueflag)
{
*queue_idx = i;
flag = true;
break;
}
LOG_VULKAN("%d\n", properties[i].queueFlags);
}
if (!flag)
LOG_VULKAN("SelectQueueCluster failed\n");
free(properties);
}
void CreateVulkanDevice(VkPhysicalDevice phy_device, int queue_idx, VkDevice *device)
{
const float priorities[] = { 1.f };
VkDeviceQueueCreateInfo queue_info = {};
queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info.pNext = NULL;
queue_info.flags = 0;
queue_info.queueFamilyIndex = queue_idx;
queue_info.queueCount = 1;
queue_info.pQueuePriorities = priorities;
//基于物理设备和队列簇创建设备
VkDeviceCreateInfo device_info;
device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device_info.pNext = NULL;
device_info.flags = 0;
device_info.queueCreateInfoCount = 1;
device_info.pQueueCreateInfos = &queue_info;
device_info.enabledLayerCount = 0;
device_info.ppEnabledLayerNames = NULL;
device_info.enabledExtensionCount = 0;
device_info.ppEnabledExtensionNames = NULL;
device_info.pEnabledFeatures = NULL;
CALL_VK(vkCreateDevice, (phy_device, &device_info, NULL, device));
}
void CreateVulkanQueue(VkDevice device,int family_idx,int queue_idx, VkQueue*queue)
{
vkGetDeviceQueue(device, family_idx, queue_idx, queue);
}
void CreateVulkanCommandPool(VkDevice device,int family_idx, VkCommandPool *command_pool)
{
VkCommandPoolCreateInfo pool_info = {};
pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
pool_info.pNext = NULL;
pool_info.flags = 0;
pool_info.queueFamilyIndex = family_idx;
CALL_VK(vkCreateCommandPool, (device, &pool_info, NULL, command_pool));
}
void CreateVulkanDescPool(VkDevice device, int size, VkDescriptorPool *descriptor_pool)
{
VkDescriptorPoolSize pool_size = {};
pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
pool_size.descriptorCount = size;
VkDescriptorPoolCreateInfo desc_info = {};
desc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
desc_info.pNext = NULL;
desc_info.flags = 0;
desc_info.maxSets = pool_size.descriptorCount;
desc_info.poolSizeCount = 1;
desc_info.pPoolSizes = &pool_size;
CALL_VK(vkCreateDescriptorPool, (device, &desc_info, NULL, descriptor_pool));
}
void CreateVulkanDescSetLayout(VkDevice device, int size, VkDescriptorSetLayout *descriptor_layout)
{
VkDescriptorSetLayoutBinding *bindings = (VkDescriptorSetLayoutBinding *)malloc(sizeof(VkDescriptorSetLayoutBinding) * size);
for (uint32_t i = 0; i < size; i++) {
bindings[i].binding = i;
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[i].descriptorCount = 1;
bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[i].pImmutableSamplers = NULL;
}
VkDescriptorSetLayoutCreateInfo desc_layout_info = {};
desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_layout_info.pNext = NULL;
desc_layout_info.flags = 0;
desc_layout_info.bindingCount = size;
desc_layout_info.pBindings = bindings;
CALL_VK(vkCreateDescriptorSetLayout, (device, &desc_layout_info, NULL, descriptor_layout));
free(bindings);
}
void CreateVulkanDescSets(VkDevice device, VkDescriptorPool descriptor_pool,VkDescriptorSetLayout descriptor_layout, VkDescriptorSet* descriptor_set)
{
VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
alloc_info.pNext = NULL;
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &descriptor_layout;
CALL_VK(vkAllocateDescriptorSets, (device, &alloc_info, descriptor_set));
}
void CreateVulkanShaderModule(VkDevice device, char* filename, VkShaderModule* module)
{
FILE* fp = fopen(filename, "rb");
fseek(fp, 0, SEEK_END);
int code_len = ftell(fp);
fseek(fp, 0, SEEK_SET);
unsigned char *code = (unsigned char*)malloc(code_len);
fread(code, sizeof(unsigned char), code_len, fp);
fclose(fp);
VkShaderModuleCreateInfo shader_info = {};
shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_info.pNext = NULL;
shader_info.flags = 0;
shader_info.codeSize = code_len;
shader_info.pCode = (const uint32_t*)code;
CALL_VK(vkCreateShaderModule, (device, &shader_info, NULL, module));
free(code);
}
void CreateVulkanPipeLineLayout(VkDevice device, VkDescriptorSetLayout layout, VkPipelineLayout* pipeline_layout)
{
VkPipelineLayoutCreateInfo pipelayout_info = {};
pipelayout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelayout_info.pNext = NULL;
pipelayout_info.flags = 0;
pipelayout_info.setLayoutCount = 1;
pipelayout_info.pSetLayouts = &layout;
pipelayout_info.pushConstantRangeCount = 0;
pipelayout_info.pPushConstantRanges = NULL;
CALL_VK(vkCreatePipelineLayout, (device, &pipelayout_info, NULL, pipeline_layout));
}
void CreateVulkanPipeLine(VkDevice device,VkShaderModule module, VkPipelineLayout layout, VkPipeline*pipeline)
{
VkPipelineShaderStageCreateInfo stage_info = {};
stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stage_info.pNext = NULL;
stage_info.flags = 0;
stage_info.stage = VK_SHADER_STAGE_COMPUTE_BIT;
stage_info.module = module;
stage_info.pName = "main";
stage_info.pSpecializationInfo = NULL;
VkComputePipelineCreateInfo pipeline_info = {};
pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pipeline_info.pNext = NULL;
pipeline_info.flags = 0;
pipeline_info.stage = stage_info;
pipeline_info.layout = layout;
pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_info.basePipelineIndex = 0;
CALL_VK(vkCreateComputePipelines, (device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline));
}
void SelectVulkanMemIdx(VkPhysicalDevice phy_device, int* mem_idx)
{
bool flag = false;
VkPhysicalDeviceMemoryProperties props;
int memory_is_cached = 0;
vkGetPhysicalDeviceMemoryProperties(phy_device, &props);
for (int i = 0; i < props.memoryTypeCount; i++)
{
printf("%d\n", props.memoryTypes[i].propertyFlags);
if (props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
if (0 == (props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
memory_is_cached = 1; //useful?
}
*mem_idx = i;
flag = true;
break;
}
}
if (!flag)
LOG_VULKAN("SelectVulkanMemIdx failed\n");
}
typedef struct tagVulkanMemInfo
{
VkDeviceMemory deviceMem;
VkBuffer buff;
int width;
}stVulkanMemInfo;
void CreateVulkanMemory(VkDevice device,int mem_idx, stVulkanMemInfo *mem)
{
VkMemoryAllocateInfo memlloc_info = {};
memlloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memlloc_info.pNext = NULL;
memlloc_info.allocationSize = mem->width * sizeof(float);
memlloc_info.memoryTypeIndex = mem_idx;
CALL_VK(vkAllocateMemory, (device, &memlloc_info, NULL, &mem->deviceMem));
VkBufferCreateInfo buffer_info = {};
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_info.pNext = NULL;
buffer_info.flags = 0;
buffer_info.size = mem->width * sizeof(float);
buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_info.queueFamilyIndexCount = 0;
buffer_info.pQueueFamilyIndices = NULL;
CALL_VK(vkCreateBuffer, (device, &buffer_info, NULL, &mem->buff));
CALL_VK(vkBindBufferMemory, (device, mem->buff, mem->deviceMem, 0));
}
void UpdateVulkanDescSets(VkDevice device, VkDescriptorSet descriptor_set,int arg_idx,stVulkanMemInfo mem)
{
VkDescriptorBufferInfo buffer_desc = {};
buffer_desc.buffer = mem.buff;
buffer_desc.offset = 0;
buffer_desc.range = mem.width * sizeof(float);
VkWriteDescriptorSet write_info;
write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write_info.pNext = NULL;
write_info.dstSet = descriptor_set;
write_info.dstBinding = arg_idx;
write_info.dstArrayElement = 0;
write_info.descriptorCount = 1;
write_info.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write_info.pImageInfo = NULL;
write_info.pBufferInfo = &buffer_desc;
write_info.pTexelBufferView = NULL;
vkUpdateDescriptorSets(device, 1, &write_info, 0, NULL);
}
void CreateVulkanCmdBuffer(VkDevice device, VkCommandPool cmdPool, VkCommandBuffer *command_buffer)
{
VkCommandBufferAllocateInfo cmdalloc_info = {};
cmdalloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmdalloc_info.pNext = NULL;
cmdalloc_info.commandPool = cmdPool;
cmdalloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdalloc_info.commandBufferCount = 1;
CALL_VK(vkAllocateCommandBuffers, (device, &cmdalloc_info, command_buffer));
}
typedef struct tagVulkanInfo
{
VkInstance instance;
VkPhysicalDevice phy_device;
VkDevice device;
VkQueue queue;
VkCommandPool command_pool;
VkDescriptorPool descriptor_pool;
VkDescriptorSetLayout descriptor_layout;
VkDescriptorSet descriptor_set;
VkShaderModule shader_module;
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
VkCommandBuffer command_buffer;
int comp_queue_family;
int mem_idx;
}stVulkanInfo;
void ProcessVulkanKernel(stVulkanInfo* info,int width)
{
VkCommandBufferBeginInfo begin_info = {};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_info.pNext = NULL;
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
begin_info.pInheritanceInfo = NULL;
CALL_VK(vkBeginCommandBuffer, (info->command_buffer, &begin_info));
vkCmdBindPipeline(info->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, info->pipeline);
vkCmdBindDescriptorSets(info->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
info->pipeline_layout,
0,
1,
&info->descriptor_set,
0,
NULL);
vkCmdDispatch(info->command_buffer, width, 1, 1);
CALL_VK(vkEndCommandBuffer, (info->command_buffer));
VkSubmitInfo submit_info = {
VK_STRUCTURE_TYPE_SUBMIT_INFO,
NULL,
0,
NULL,
NULL,
1,
&info->command_buffer,
0,
NULL
};
VkFence fence;
VkFenceCreateInfo fence_info = {
VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
NULL,
0
};
CALL_VK(vkCreateFence, (info->device, &fence_info, NULL, &fence));
CALL_VK(vkQueueSubmit, (info->queue, 1, &submit_info, fence));
CALL_VK(vkWaitForFences, (info->device, 1, &fence, VK_TRUE, 1e9 * 5));
vkDestroyFence(info->device, fence, NULL);
}
void InitVulkanInfo(stVulkanInfo *info, int param_size)
{
info->mem_idx = -1;
info->comp_queue_family = -1;
CreateVulkanInstance(&info->instance);
SelectVulkanPhyDevice(info->instance, &info->phy_device);
SelectVulkanQueueCluster(info->phy_device, VK_QUEUE_COMPUTE_BIT, &info->comp_queue_family);
SelectVulkanMemIdx(info->phy_device, &info->mem_idx);
CreateVulkanDevice(info->phy_device, info->comp_queue_family, &info->device);
CreateVulkanQueue(info->device, info->comp_queue_family, 0, &info->queue);
CreateVulkanCommandPool(info->device, info->comp_queue_family, &info->command_pool);
CreateVulkanCmdBuffer(info->device, info->command_pool, &info->command_buffer);
CreateVulkanDescPool(info->device, param_size, &info->descriptor_pool);
CreateVulkanDescSetLayout(info->device, param_size, &info->descriptor_layout);
CreateVulkanDescSets(info->device, info->descriptor_pool, info->descriptor_layout, &info->descriptor_set);
CreateVulkanPipeLineLayout(info->device, info->descriptor_layout, &info->pipeline_layout);
}
void UninitVulkanInfo(stVulkanInfo *info)
{
info->mem_idx = -1;
info->comp_queue_family = -1;
vkFreeDescriptorSets(info->device,info->descriptor_pool,1,&info->descriptor_set);
vkDestroyShaderModule(info->device, info->shader_module, NULL);
vkDestroyDescriptorPool(info->device, info->descriptor_pool, NULL);
vkDestroyDescriptorSetLayout(info->device, info->descriptor_layout, NULL);
vkDestroyPipelineLayout(info->device, info->pipeline_layout, NULL);
vkDestroyPipeline(info->device, info->pipeline, NULL);
vkFreeCommandBuffers(info->device, info->command_pool, 1, &info->command_buffer);
vkDestroyCommandPool(info->device, info->command_pool, NULL);
vkDestroyDevice(info->device, NULL);
vkDestroyInstance(info->instance, NULL);
}
int main()
{
int width0 = 1024;
int width1 = 1024;
int param_size = 3;
stVulkanInfo vulkan_info;
InitVulkanInfo(&vulkan_info, param_size);
CreateVulkanShaderModule(vulkan_info.device, "comp.spv",&vulkan_info.shader_module);
CreateVulkanPipeLine(vulkan_info.device, vulkan_info.shader_module, vulkan_info.pipeline_layout, &vulkan_info.pipeline);
stVulkanMemInfo input0, input1, output1;
input0.width = width0;
input1.width = width1;
output1.width = width1;
CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &input0);
CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &input1);
CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &output1);
//感觉是set参数
UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 0, input0);
UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 1, input1);
UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 2, output1);
//向GPU内存里写数据
//input0
void *ptr = NULL;
CALL_VK(vkMapMemory, (vulkan_info.device, input0.deviceMem, 0, input0.width * sizeof(float), 0, &ptr));
int* tmp_ptr = (int*)ptr;
for (int i = 0; i < width0; i++)
{
tmp_ptr[i] = i;
}
vkUnmapMemory(vulkan_info.device, input0.deviceMem);
//input1
CALL_VK(vkMapMemory, (vulkan_info.device, input1.deviceMem, 0, input1.width * sizeof(float), 0, &ptr));
tmp_ptr = (int*)ptr;
for (int i = 0; i < width1; i++)
{
tmp_ptr[i] = i * i;
}
vkUnmapMemory(vulkan_info.device, input1.deviceMem);
//output1
CALL_VK(vkMapMemory, (vulkan_info.device, output1.deviceMem, 0, output1.width * sizeof(float), 0, &ptr));
tmp_ptr = (int*)ptr;
for (int i = 0; i < width1; i++)
{
tmp_ptr[i] = 0;
}
vkUnmapMemory(vulkan_info.device, output1.deviceMem);
//执行kernel
ProcessVulkanKernel(&vulkan_info,1);
//查看输出结果
CALL_VK(vkMapMemory, (vulkan_info.device, output1.deviceMem, 0, width0 * sizeof(float), 0, &ptr));
vkUnmapMemory(vulkan_info.device, output1.deviceMem);
vkFreeMemory(vulkan_info.device, input0.deviceMem, NULL);
vkFreeMemory(vulkan_info.device, input1.deviceMem, NULL);
vkFreeMemory(vulkan_info.device, output1.deviceMem, NULL);
vkDestroyBuffer(vulkan_info.device, input0.buff, NULL);
vkDestroyBuffer(vulkan_info.device, input1.buff, NULL);
vkDestroyBuffer(vulkan_info.device, output1.buff, NULL);
UninitVulkanInfo(&vulkan_info);
printf("11\n");
}
kernel代码:
#version 450
layout (
local_size_x = 8,
local_size_y = 1,
local_size_z = 1
) in;
layout (binding = 0) buffer buf_in { int buffer_in[]; };
layout (binding = 1) buffer buf_in1 { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };
void main()
{
if (gl_GlobalInvocationID.x >= 1024)
return;
uint id = gl_GlobalInvocationID.x;
buffer_out[id] = buffer_in[id] * buffer_in1[id];
}
试验1:目的:了解local size是干啥的。
vkCmdDispatch(info->command_buffer, width, 1, 1)中的width设置为1,即该任务的groupCountX/Y/Z都为1,分开设置local size,发现整体计算量与local size设置有关。即整体计算量为groupcount*local。如果不设置local_size,默认三个参数都为1。
试验2:目的:了解推入常量。推入常量是用来设置kernel的一些共有参数,对于卷积而言,这些如kernel尺寸等参数还是需要设置的。
host端程序需要在kernel之前完成推入常量的设置:
int scale = 10;
vkCmdPushConstants(vulkan_info.command_buffer, vulkan_info.pipeline_layout,VK_SHADER_STAGE_COMPUTE_BIT,0,sizeof(scale),&scale);
vkDestroyPipelineLayout(vulkan_info.device, vulkan_info.pipeline_layout, NULL); //销毁之前创建的
VkPushConstantRange pushrange = {};
pushrange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
pushrange.offset = 0;
pushrange.size = sizeof(scale);
VkPipelineLayoutCreateInfo pipelayout_info = {};
pipelayout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelayout_info.pNext = NULL;
pipelayout_info.flags = 0;
pipelayout_info.setLayoutCount = 1;
pipelayout_info.pSetLayouts = &vulkan_info.descriptor_layout;
pipelayout_info.pushConstantRangeCount = 1;
pipelayout_info.pPushConstantRanges = &pushrange;
CALL_VK(vkCreatePipelineLayout, (vulkan_info.device, &pipelayout_info, NULL, &vulkan_info.pipeline_layout));
然后再把kernel里面加入推入常量的读取
#version 450
layout(push_constant) uniform PushConsts {
int scale;
} pushConsts;
layout (binding = 0) buffer buf_in { int buffer_in[]; };
layout (binding = 1) buffer buf_in1 { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };
void main()
{
uint id = gl_GlobalInvocationID.x;
int tmp = buffer_in[id] * buffer_in1[id];
buffer_out[id] = tmp / pushConsts.scale;
}
这里注意这个scale参数是以结构体的方式传入进来的。
试验3:试一下uniform内存的使用。注意此处增加了kernel参数的内存,所以需要将内存个数即param_size改为4。main函数里的配置
对于uniform内存而言,使用方法和普通内存类似。但是uniform是只读的一片内存,创建和填充过程如下:
//创建uniform内存来设置参数,pushconstant内存有限,128字节
//uniform内存是只读着色器的缓冲区
VkBuffer uniform_buf;
VkDeviceMemory uniform_mem;
int scale = 10;
VkMemoryAllocateInfo memlloc_info = {};
memlloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memlloc_info.pNext = NULL;
memlloc_info.allocationSize = sizeof(scale);
memlloc_info.memoryTypeIndex = vulkan_info.mem_idx;
CALL_VK(vkAllocateMemory, (vulkan_info.device, &memlloc_info, NULL, &uniform_mem));
VkBufferCreateInfo uniform_info = {};
uniform_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
uniform_info.pNext = NULL;
uniform_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
uniform_info.size = sizeof(scale);
uniform_info.queueFamilyIndexCount = 0;
uniform_info.pQueueFamilyIndices = NULL;
uniform_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
uniform_info.flags = 0;
CALL_VK(vkCreateBuffer,(vulkan_info.device,&uniform_info,NULL,&uniform_buf));
CALL_VK(vkBindBufferMemory, (vulkan_info.device, uniform_buf, uniform_mem, 0));
VkDescriptorBufferInfo buffer_desc = {};
buffer_desc.buffer = uniform_buf;
buffer_desc.offset = 0;
buffer_desc.range = sizeof(float);
VkWriteDescriptorSet write_info;
write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write_info.pNext = NULL;
write_info.dstSet = vulkan_info.descriptor_set; //需要将descriptor_set的参数设置
write_info.dstBinding = 3;
write_info.dstArrayElement = 0;
write_info.descriptorCount = 1;
write_info.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
write_info.pImageInfo = NULL;
write_info.pBufferInfo = &buffer_desc;
write_info.pTexelBufferView = NULL;
vkUpdateDescriptorSets(vulkan_info.device, 1, &write_info, 0, NULL);
CALL_VK(vkMapMemory, (vulkan_info.device, uniform_mem, 0, sizeof(float), 0, &ptr));
tmp_ptr = (int*)ptr;
tmp_ptr[0] = scale;
vkUnmapMemory(vulkan_info.device, uniform_mem);
与普通内存相比,需要将Type改为UNIFORM的相关type。注意创建过程需要descriptor_set参数,因此修改下该参数设置(改为uniform buffer):
void CreateVulkanDescSetLayout(VkDevice device, int size, VkDescriptorSetLayout *descriptor_layout)
{
VkDescriptorSetLayoutBinding *bindings = (VkDescriptorSetLayoutBinding *)malloc(sizeof(VkDescriptorSetLayoutBinding) * size);
for (uint32_t i = 0; i < size - 1; i++) {
bindings[i].binding = i;
bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[i].descriptorCount = 1;
bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[i].pImmutableSamplers = NULL;
}
bindings[size - 1].binding = size - 1;
bindings[size - 1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[size - 1].descriptorCount = 1;
bindings[size - 1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[size - 1].pImmutableSamplers = NULL;
VkDescriptorSetLayoutCreateInfo desc_layout_info = {};
desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_layout_info.pNext = NULL;
desc_layout_info.flags = 0;
desc_layout_info.bindingCount = size;
desc_layout_info.pBindings = bindings;
CALL_VK(vkCreateDescriptorSetLayout, (device, &desc_layout_info, NULL, descriptor_layout));
free(bindings);
}
相应的kernel也得改一下:
#version 450
layout (binding = 0) buffer buf_in { int buffer_in[]; };
layout (binding = 1) buffer buf_in1 { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };
layout (set=0,binding = 3) uniform mem{
int scale;
};
void main()
{
uint id = gl_GlobalInvocationID.x;
int tmp = buffer_in[id] * buffer_in1[id];
buffer_out[id] = tmp / scale;
}
关于vulkan调试:如何在kernel里面printf?收回这个方式,目前对调试无效,还是通过把数据拿到一个临时buffer里面比较好。
#extension GL_EXT_debug_printf:enable
debugPrintfEXT("hello\n");
注意了,kernel一定要做好内存保护。