vulkan通用计算学习

当前学习状态为计算机图形学为0,关于着色器、纹理等概念不是很清楚,目前在尝试使用vulkan完成通用计算,sample code参考github,只不过自己重新照着抄了一份,代码如下:

#include <vulkan/vulkan.h>
#include <stdio.h>
#include <stdlib.h>
#include<windows.h>
unsigned long long GetTime(void)
{
	LARGE_INTEGER m_liPerfFreq = { 0 };
	LARGE_INTEGER m_liPerfStart = { 0 };
	QueryPerformanceFrequency(&m_liPerfFreq);
	QueryPerformanceCounter(&m_liPerfStart);
	return (unsigned long long)(m_liPerfStart.QuadPart * 1.0 * 1000 * 1000 / m_liPerfFreq.QuadPart);
}

#define LOG_VULKAN printf
static void check_vkresult(const char* funcname, VkResult res)
{
	if (res == VK_SUCCESS) {
		fprintf(stderr, "%s success\n", funcname);
		return;
	}
	else
	{
		fprintf(stderr, "%s failed: %d\n", funcname, res);
		return;
	}
}
#define CALL_VK(Func, Param) check_vkresult(#Func, Func Param)

void CreateVulkanInstance(VkInstance *instance)
{
	VkApplicationInfo app_info = {};
	app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
	app_info.pNext = NULL;
	app_info.pApplicationName = "hello world";
	app_info.applicationVersion = 1;
	app_info.pEngineName = "hello world";
	app_info.engineVersion = 1;
	app_info.apiVersion = VK_MAKE_VERSION(1, 1, 2);

	VkInstanceCreateInfo info = {};
	info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
	info.pNext = NULL;
	info.flags = 0;
	info.pApplicationInfo = &app_info;
	info.enabledLayerCount = 0;
	info.ppEnabledLayerNames = NULL;
	info.enabledExtensionCount = 0;
	info.ppEnabledExtensionNames = NULL;

	CALL_VK(vkCreateInstance, (&info, NULL, instance));   //useless
}

void SelectVulkanPhyDevice(VkInstance instance, VkPhysicalDevice* phy_device)
{
	unsigned int device_count = 0;
	CALL_VK(vkEnumeratePhysicalDevices, (instance, &device_count, NULL));
	VkPhysicalDevice*devices = (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * device_count);
	CALL_VK(vkEnumeratePhysicalDevices, (instance, &device_count, devices));
	for (int i = 0; i < device_count; i++)
	{
		VkPhysicalDeviceProperties props;
		vkGetPhysicalDeviceProperties(devices[i], &props);
		LOG_VULKAN("%s\n", props.deviceName);
	}
	*phy_device = devices[0];
	free(devices);
}

void SelectVulkanQueueCluster(VkPhysicalDevice phy_device, VkQueueFlags queueflag, int* queue_idx)
{
	unsigned int queue_count = 0;
	vkGetPhysicalDeviceQueueFamilyProperties(phy_device, &queue_count, NULL);
	VkQueueFamilyProperties *properties = (VkQueueFamilyProperties*)malloc(queue_count * sizeof(VkQueueFamilyProperties));
	vkGetPhysicalDeviceQueueFamilyProperties(phy_device, &queue_count, properties);
	bool flag = false;
	for (int i = 0; i < queue_count; i++)
	{
		if (properties[i].queueFlags | queueflag)
		{
			*queue_idx = i;
			flag = true;
			break;
		}
		LOG_VULKAN("%d\n", properties[i].queueFlags);
	}
	if (!flag)
		LOG_VULKAN("SelectQueueCluster failed\n");
	free(properties);
}

void CreateVulkanDevice(VkPhysicalDevice phy_device, int queue_idx, VkDevice *device)
{
	const float priorities[] = { 1.f };
	VkDeviceQueueCreateInfo queue_info = {};
	queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
	queue_info.pNext = NULL;
	queue_info.flags = 0;
	queue_info.queueFamilyIndex = queue_idx;
	queue_info.queueCount = 1;
	queue_info.pQueuePriorities = priorities;

	//基于物理设备和队列簇创建设备
	VkDeviceCreateInfo device_info;
	device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
	device_info.pNext = NULL;
	device_info.flags = 0;
	device_info.queueCreateInfoCount = 1;
	device_info.pQueueCreateInfos = &queue_info;
	device_info.enabledLayerCount = 0;
	device_info.ppEnabledLayerNames = NULL;
	device_info.enabledExtensionCount = 0;
	device_info.ppEnabledExtensionNames = NULL;
	device_info.pEnabledFeatures = NULL;
	CALL_VK(vkCreateDevice, (phy_device, &device_info, NULL, device));
}

void CreateVulkanQueue(VkDevice device,int family_idx,int queue_idx, VkQueue*queue)
{
	vkGetDeviceQueue(device, family_idx, queue_idx, queue);
}

void CreateVulkanCommandPool(VkDevice device,int family_idx, VkCommandPool *command_pool)
{
	VkCommandPoolCreateInfo pool_info = {};
	pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
	pool_info.pNext = NULL;
	pool_info.flags = 0;
	pool_info.queueFamilyIndex = family_idx;

	CALL_VK(vkCreateCommandPool, (device, &pool_info, NULL, command_pool));
}

void CreateVulkanDescPool(VkDevice device, int size, VkDescriptorPool *descriptor_pool)
{
	VkDescriptorPoolSize pool_size = {};
	pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	pool_size.descriptorCount = size;
	VkDescriptorPoolCreateInfo desc_info = {};
	desc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
	desc_info.pNext = NULL;
	desc_info.flags = 0;
	desc_info.maxSets = pool_size.descriptorCount;
	desc_info.poolSizeCount = 1;
	desc_info.pPoolSizes = &pool_size;
	CALL_VK(vkCreateDescriptorPool, (device, &desc_info, NULL, descriptor_pool));
}

void CreateVulkanDescSetLayout(VkDevice device, int size, VkDescriptorSetLayout *descriptor_layout)
{
	VkDescriptorSetLayoutBinding *bindings = (VkDescriptorSetLayoutBinding *)malloc(sizeof(VkDescriptorSetLayoutBinding) * size);
	for (uint32_t i = 0; i < size; i++) {
		bindings[i].binding = i;
		bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
		bindings[i].descriptorCount = 1;
		bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
		bindings[i].pImmutableSamplers = NULL;
	}
	VkDescriptorSetLayoutCreateInfo desc_layout_info = {};
	desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
	desc_layout_info.pNext = NULL;
	desc_layout_info.flags = 0;
	desc_layout_info.bindingCount = size;
	desc_layout_info.pBindings = bindings;
	CALL_VK(vkCreateDescriptorSetLayout, (device, &desc_layout_info, NULL, descriptor_layout));
	free(bindings);
}

void CreateVulkanDescSets(VkDevice device, VkDescriptorPool descriptor_pool,VkDescriptorSetLayout descriptor_layout, VkDescriptorSet* descriptor_set)
{
	VkDescriptorSetAllocateInfo alloc_info = {};
	alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
	alloc_info.pNext = NULL;
	alloc_info.descriptorPool = descriptor_pool;
	alloc_info.descriptorSetCount = 1;
	alloc_info.pSetLayouts = &descriptor_layout;
	CALL_VK(vkAllocateDescriptorSets, (device, &alloc_info, descriptor_set));
}

void CreateVulkanShaderModule(VkDevice device, char* filename, VkShaderModule* module)
{
	FILE* fp = fopen(filename, "rb");
	fseek(fp, 0, SEEK_END);
	int code_len = ftell(fp);
	fseek(fp, 0, SEEK_SET);
	unsigned char *code = (unsigned char*)malloc(code_len);
	fread(code, sizeof(unsigned char), code_len, fp);
	fclose(fp);

	VkShaderModuleCreateInfo shader_info = {};
	shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
	shader_info.pNext = NULL;
	shader_info.flags = 0;
	shader_info.codeSize = code_len;
	shader_info.pCode = (const uint32_t*)code;

	CALL_VK(vkCreateShaderModule, (device, &shader_info, NULL, module));
	free(code);
}

void CreateVulkanPipeLineLayout(VkDevice device, VkDescriptorSetLayout layout, VkPipelineLayout* pipeline_layout)
{
	VkPipelineLayoutCreateInfo pipelayout_info = {};
	pipelayout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
	pipelayout_info.pNext = NULL;
	pipelayout_info.flags = 0;
	pipelayout_info.setLayoutCount = 1;
	pipelayout_info.pSetLayouts = &layout;
	pipelayout_info.pushConstantRangeCount = 0;
	pipelayout_info.pPushConstantRanges = NULL;
	CALL_VK(vkCreatePipelineLayout, (device, &pipelayout_info, NULL, pipeline_layout));
}

void CreateVulkanPipeLine(VkDevice device,VkShaderModule module, VkPipelineLayout layout, VkPipeline*pipeline)
{
	VkPipelineShaderStageCreateInfo stage_info = {};
	stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
	stage_info.pNext = NULL;
	stage_info.flags = 0;
	stage_info.stage = VK_SHADER_STAGE_COMPUTE_BIT;
	stage_info.module = module;
	stage_info.pName = "main";
	stage_info.pSpecializationInfo = NULL;

	VkComputePipelineCreateInfo pipeline_info = {};
	pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
	pipeline_info.pNext = NULL;
	pipeline_info.flags = 0;
	pipeline_info.stage = stage_info;
	pipeline_info.layout = layout;
	pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
	pipeline_info.basePipelineIndex = 0;
	CALL_VK(vkCreateComputePipelines, (device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline));
}

void SelectVulkanMemIdx(VkPhysicalDevice phy_device, int* mem_idx)
{
	bool flag = false;
	VkPhysicalDeviceMemoryProperties props;
	int memory_is_cached = 0;
	vkGetPhysicalDeviceMemoryProperties(phy_device, &props);
	for (int i = 0; i < props.memoryTypeCount; i++)
	{
		printf("%d\n", props.memoryTypes[i].propertyFlags);
		if (props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
			if (0 == (props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
				memory_is_cached = 1;  //useful?
			}

			*mem_idx = i;
			flag = true;
			break;
		}
	}
	if (!flag)
		LOG_VULKAN("SelectVulkanMemIdx failed\n");
}
typedef struct tagVulkanMemInfo
{
	VkDeviceMemory deviceMem;
	VkBuffer buff;
	int width;
}stVulkanMemInfo;

void CreateVulkanMemory(VkDevice device,int mem_idx, stVulkanMemInfo *mem)
{
	VkMemoryAllocateInfo memlloc_info = {};
	memlloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
	memlloc_info.pNext = NULL;
	memlloc_info.allocationSize = mem->width * sizeof(float);
	memlloc_info.memoryTypeIndex = mem_idx;
	CALL_VK(vkAllocateMemory, (device, &memlloc_info, NULL, &mem->deviceMem));

	VkBufferCreateInfo buffer_info = {};
	buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
	buffer_info.pNext = NULL;
	buffer_info.flags = 0;
	buffer_info.size = mem->width * sizeof(float);
	buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
	buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
	buffer_info.queueFamilyIndexCount = 0;
	buffer_info.pQueueFamilyIndices = NULL;
	CALL_VK(vkCreateBuffer, (device, &buffer_info, NULL, &mem->buff));
	CALL_VK(vkBindBufferMemory, (device, mem->buff, mem->deviceMem, 0));
}

void UpdateVulkanDescSets(VkDevice device, VkDescriptorSet descriptor_set,int arg_idx,stVulkanMemInfo mem)
{
	VkDescriptorBufferInfo buffer_desc = {};
	buffer_desc.buffer = mem.buff;
	buffer_desc.offset = 0;
	buffer_desc.range = mem.width * sizeof(float);

	VkWriteDescriptorSet write_info;
	write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
	write_info.pNext = NULL;
	write_info.dstSet = descriptor_set;
	write_info.dstBinding = arg_idx;
	write_info.dstArrayElement = 0;
	write_info.descriptorCount = 1;
	write_info.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	write_info.pImageInfo = NULL;
	write_info.pBufferInfo = &buffer_desc;
	write_info.pTexelBufferView = NULL;
	vkUpdateDescriptorSets(device, 1, &write_info, 0, NULL);
}

void CreateVulkanCmdBuffer(VkDevice device, VkCommandPool cmdPool, VkCommandBuffer *command_buffer)
{
	VkCommandBufferAllocateInfo cmdalloc_info = {};
	cmdalloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
	cmdalloc_info.pNext = NULL;
	cmdalloc_info.commandPool = cmdPool;
	cmdalloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
	cmdalloc_info.commandBufferCount = 1;

	CALL_VK(vkAllocateCommandBuffers, (device, &cmdalloc_info, command_buffer));
}

typedef struct tagVulkanInfo
{
	VkInstance instance;
	VkPhysicalDevice phy_device;
	VkDevice device;
	VkQueue queue;
	VkCommandPool command_pool;
	VkDescriptorPool descriptor_pool;
	VkDescriptorSetLayout descriptor_layout;
	VkDescriptorSet descriptor_set;
	VkShaderModule shader_module;
	VkPipelineLayout pipeline_layout;
	VkPipeline pipeline;
	VkCommandBuffer command_buffer;
	int comp_queue_family;
	int mem_idx;
}stVulkanInfo;

void ProcessVulkanKernel(stVulkanInfo* info,int width)
{
	VkCommandBufferBeginInfo begin_info = {};
	begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
	begin_info.pNext = NULL;
	begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
	begin_info.pInheritanceInfo = NULL;
	CALL_VK(vkBeginCommandBuffer, (info->command_buffer, &begin_info));
	vkCmdBindPipeline(info->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, info->pipeline);
	vkCmdBindDescriptorSets(info->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
		info->pipeline_layout,
		0,
		1,
		&info->descriptor_set,
		0,
		NULL);
	vkCmdDispatch(info->command_buffer, width, 1, 1);
	CALL_VK(vkEndCommandBuffer, (info->command_buffer));
	VkSubmitInfo submit_info = {
		VK_STRUCTURE_TYPE_SUBMIT_INFO,
		NULL,
		0,
		NULL,
		NULL,
		1,
		&info->command_buffer,
		0,
		NULL
	};
	VkFence fence;
	VkFenceCreateInfo fence_info = {
		VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
		NULL,
		0
	};

	CALL_VK(vkCreateFence, (info->device, &fence_info, NULL, &fence));

	CALL_VK(vkQueueSubmit, (info->queue, 1, &submit_info, fence));
	CALL_VK(vkWaitForFences, (info->device, 1, &fence, VK_TRUE, 1e9 * 5));

	vkDestroyFence(info->device, fence, NULL);
}



void InitVulkanInfo(stVulkanInfo *info, int param_size)
{
	info->mem_idx = -1;
	info->comp_queue_family = -1;

	CreateVulkanInstance(&info->instance);
	SelectVulkanPhyDevice(info->instance, &info->phy_device);

	SelectVulkanQueueCluster(info->phy_device, VK_QUEUE_COMPUTE_BIT, &info->comp_queue_family);
	SelectVulkanMemIdx(info->phy_device, &info->mem_idx);

	CreateVulkanDevice(info->phy_device, info->comp_queue_family, &info->device);
	CreateVulkanQueue(info->device, info->comp_queue_family, 0, &info->queue);
	CreateVulkanCommandPool(info->device, info->comp_queue_family, &info->command_pool);
	CreateVulkanCmdBuffer(info->device, info->command_pool, &info->command_buffer);	

	CreateVulkanDescPool(info->device, param_size, &info->descriptor_pool);
	CreateVulkanDescSetLayout(info->device, param_size, &info->descriptor_layout);
	CreateVulkanDescSets(info->device, info->descriptor_pool, info->descriptor_layout, &info->descriptor_set);
	CreateVulkanPipeLineLayout(info->device, info->descriptor_layout, &info->pipeline_layout);
}

void UninitVulkanInfo(stVulkanInfo *info)
{
	info->mem_idx = -1;
	info->comp_queue_family = -1;
	vkFreeDescriptorSets(info->device,info->descriptor_pool,1,&info->descriptor_set);
	vkDestroyShaderModule(info->device, info->shader_module, NULL);
	vkDestroyDescriptorPool(info->device, info->descriptor_pool, NULL);
	vkDestroyDescriptorSetLayout(info->device, info->descriptor_layout, NULL);
	vkDestroyPipelineLayout(info->device, info->pipeline_layout, NULL);
	vkDestroyPipeline(info->device, info->pipeline, NULL);

	vkFreeCommandBuffers(info->device, info->command_pool, 1, &info->command_buffer);
	vkDestroyCommandPool(info->device, info->command_pool, NULL);
	
	vkDestroyDevice(info->device, NULL);
	vkDestroyInstance(info->instance, NULL);
}

int main()
{
	int width0 = 1024;
	int width1 = 1024;
	int param_size = 3;

	stVulkanInfo vulkan_info;
	InitVulkanInfo(&vulkan_info, param_size);

	CreateVulkanShaderModule(vulkan_info.device, "comp.spv",&vulkan_info.shader_module);

	CreateVulkanPipeLine(vulkan_info.device, vulkan_info.shader_module, vulkan_info.pipeline_layout, &vulkan_info.pipeline);

	stVulkanMemInfo input0, input1, output1;
	input0.width = width0;
	input1.width = width1;
	output1.width = width1;
	CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &input0);
	CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &input1);
	CreateVulkanMemory(vulkan_info.device, vulkan_info.mem_idx, &output1);
	
	//感觉是set参数
	UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 0, input0);
	UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 1, input1);
	UpdateVulkanDescSets(vulkan_info.device, vulkan_info.descriptor_set, 2, output1);
	
	//向GPU内存里写数据
	//input0
	void *ptr = NULL;
	CALL_VK(vkMapMemory, (vulkan_info.device, input0.deviceMem, 0, input0.width * sizeof(float), 0, &ptr));

	int* tmp_ptr = (int*)ptr;
	for (int i = 0; i < width0; i++)
	{
		tmp_ptr[i] = i;
	}
	vkUnmapMemory(vulkan_info.device, input0.deviceMem);

	//input1
	CALL_VK(vkMapMemory, (vulkan_info.device, input1.deviceMem, 0, input1.width * sizeof(float), 0, &ptr));

	tmp_ptr = (int*)ptr;
	for (int i = 0; i < width1; i++)
	{
		tmp_ptr[i] = i * i;
	}
	vkUnmapMemory(vulkan_info.device, input1.deviceMem);

	//output1
	CALL_VK(vkMapMemory, (vulkan_info.device, output1.deviceMem, 0, output1.width * sizeof(float), 0, &ptr));

	tmp_ptr = (int*)ptr;
	for (int i = 0; i < width1; i++)
	{
		tmp_ptr[i] = 0;
	}
	vkUnmapMemory(vulkan_info.device, output1.deviceMem);
	//执行kernel
	ProcessVulkanKernel(&vulkan_info,1);
	
	//查看输出结果
	CALL_VK(vkMapMemory, (vulkan_info.device, output1.deviceMem, 0, width0 * sizeof(float), 0, &ptr));
	vkUnmapMemory(vulkan_info.device, output1.deviceMem);

	vkFreeMemory(vulkan_info.device, input0.deviceMem, NULL);
	vkFreeMemory(vulkan_info.device, input1.deviceMem, NULL);
	vkFreeMemory(vulkan_info.device, output1.deviceMem, NULL);
	vkDestroyBuffer(vulkan_info.device, input0.buff, NULL);
	vkDestroyBuffer(vulkan_info.device, input1.buff, NULL);
	vkDestroyBuffer(vulkan_info.device, output1.buff, NULL);
	
	UninitVulkanInfo(&vulkan_info);
	printf("11\n");
	
}

kernel代码:

#version 450

layout (
    local_size_x = 8,
    local_size_y = 1,
    local_size_z = 1
) in;

layout (binding = 0) buffer buf_in  { int buffer_in[]; };
layout (binding = 1) buffer buf_in1  { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };

void main()
{
    if (gl_GlobalInvocationID.x >= 1024)
        return;

    uint id = gl_GlobalInvocationID.x;
    buffer_out[id] = buffer_in[id] * buffer_in1[id];
}

试验1:目的:了解local size是干啥的。

vkCmdDispatch(info->command_buffer, width, 1, 1)中的width设置为1,即该任务的groupCountX/Y/Z都为1,分开设置local size,发现整体计算量与local size设置有关。即整体计算量为groupcount*local。如果不设置local_size,默认三个参数都为1。

试验2:目的:了解推入常量。推入常量是用来设置kernel的一些共有参数,对于卷积而言,这些如kernel尺寸等参数还是需要设置的。

host端程序需要在kernel之前完成推入常量的设置:

    int scale = 10;
	vkCmdPushConstants(vulkan_info.command_buffer, vulkan_info.pipeline_layout,VK_SHADER_STAGE_COMPUTE_BIT,0,sizeof(scale),&scale);

	vkDestroyPipelineLayout(vulkan_info.device, vulkan_info.pipeline_layout, NULL);  //销毁之前创建的
	VkPushConstantRange pushrange = {};
	pushrange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
	pushrange.offset = 0;
	pushrange.size = sizeof(scale);

	VkPipelineLayoutCreateInfo pipelayout_info = {};
	pipelayout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
	pipelayout_info.pNext = NULL;
	pipelayout_info.flags = 0;
	pipelayout_info.setLayoutCount = 1;
	pipelayout_info.pSetLayouts = &vulkan_info.descriptor_layout;
	pipelayout_info.pushConstantRangeCount = 1;
	pipelayout_info.pPushConstantRanges = &pushrange;
	CALL_VK(vkCreatePipelineLayout, (vulkan_info.device, &pipelayout_info, NULL, &vulkan_info.pipeline_layout));

然后再把kernel里面加入推入常量的读取

#version 450

layout(push_constant) uniform PushConsts {
	int scale;
} pushConsts;
layout (binding = 0) buffer buf_in  { int buffer_in[]; };
layout (binding = 1) buffer buf_in1  { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };

void main()
{
    uint id = gl_GlobalInvocationID.x;
    int tmp = buffer_in[id] * buffer_in1[id];
	buffer_out[id] = tmp / pushConsts.scale;
}

这里注意这个scale参数是以结构体的方式传入进来的。

试验3:试一下uniform内存的使用。注意此处增加了kernel参数的内存,所以需要将内存个数即param_size改为4。main函数里的配置

对于uniform内存而言,使用方法和普通内存类似。但是uniform是只读的一片内存,创建和填充过程如下:

    //创建uniform内存来设置参数,pushconstant内存有限,128字节
	//uniform内存是只读着色器的缓冲区
	VkBuffer uniform_buf;
	VkDeviceMemory uniform_mem;
	int scale = 10;
	VkMemoryAllocateInfo memlloc_info = {};
	memlloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
	memlloc_info.pNext = NULL;
	memlloc_info.allocationSize = sizeof(scale);
	memlloc_info.memoryTypeIndex = vulkan_info.mem_idx;
	CALL_VK(vkAllocateMemory, (vulkan_info.device, &memlloc_info, NULL, &uniform_mem));

	VkBufferCreateInfo uniform_info = {};
	uniform_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
	uniform_info.pNext = NULL;
	uniform_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
	uniform_info.size = sizeof(scale);
	uniform_info.queueFamilyIndexCount = 0;
	uniform_info.pQueueFamilyIndices = NULL;
	uniform_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
	uniform_info.flags = 0;
	CALL_VK(vkCreateBuffer,(vulkan_info.device,&uniform_info,NULL,&uniform_buf));
	CALL_VK(vkBindBufferMemory, (vulkan_info.device, uniform_buf, uniform_mem, 0));

	VkDescriptorBufferInfo buffer_desc = {};
	buffer_desc.buffer = uniform_buf;
	buffer_desc.offset = 0;
	buffer_desc.range = sizeof(float);

	VkWriteDescriptorSet write_info;
	write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
	write_info.pNext = NULL;
	write_info.dstSet = vulkan_info.descriptor_set;  //需要将descriptor_set的参数设置
	write_info.dstBinding = 3;
	write_info.dstArrayElement = 0;
	write_info.descriptorCount = 1;
	write_info.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
	write_info.pImageInfo = NULL;
	write_info.pBufferInfo = &buffer_desc;
	write_info.pTexelBufferView = NULL;
	vkUpdateDescriptorSets(vulkan_info.device, 1, &write_info, 0, NULL);

	CALL_VK(vkMapMemory, (vulkan_info.device, uniform_mem, 0, sizeof(float), 0, &ptr));

	tmp_ptr = (int*)ptr;
	tmp_ptr[0] = scale;
	vkUnmapMemory(vulkan_info.device, uniform_mem);

与普通内存相比,需要将Type改为UNIFORM的相关type。注意创建过程需要descriptor_set参数,因此修改下该参数设置(改为uniform buffer):

void CreateVulkanDescSetLayout(VkDevice device, int size, VkDescriptorSetLayout *descriptor_layout)
{
	VkDescriptorSetLayoutBinding *bindings = (VkDescriptorSetLayoutBinding *)malloc(sizeof(VkDescriptorSetLayoutBinding) * size);
	for (uint32_t i = 0; i < size - 1; i++) {
		bindings[i].binding = i;
		bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
		bindings[i].descriptorCount = 1;
		bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
		bindings[i].pImmutableSamplers = NULL;
	}
	bindings[size - 1].binding = size - 1;
	bindings[size - 1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
	bindings[size - 1].descriptorCount = 1;
	bindings[size - 1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
	bindings[size - 1].pImmutableSamplers = NULL;
	VkDescriptorSetLayoutCreateInfo desc_layout_info = {};
	desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
	desc_layout_info.pNext = NULL;
	desc_layout_info.flags = 0;
	desc_layout_info.bindingCount = size;
	desc_layout_info.pBindings = bindings;
	CALL_VK(vkCreateDescriptorSetLayout, (device, &desc_layout_info, NULL, descriptor_layout));
	free(bindings);
}

相应的kernel也得改一下:

#version 450

layout (binding = 0) buffer buf_in  { int buffer_in[]; };
layout (binding = 1) buffer buf_in1  { int buffer_in1[]; };
layout (binding = 2) buffer buf_out { int buffer_out[]; };
layout (set=0,binding = 3) uniform mem{
	int scale;
};

void main()
{
    uint id = gl_GlobalInvocationID.x;
    int tmp = buffer_in[id] * buffer_in1[id];
	buffer_out[id] = tmp / scale;
}

关于vulkan调试:如何在kernel里面printf?收回这个方式,目前对调试无效,还是通过把数据拿到一个临时buffer里面比较好。

#extension GL_EXT_debug_printf:enable
debugPrintfEXT("hello\n");

注意了,kernel一定要做好内存保护。

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值