OpenCL中kernel的循环调用

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u011028771/article/details/52764422

kernel的循环调用主要是涉及缓冲区的创建和主机端命令同步;很多例子中会在创建缓存对象时对缓存对象做初始化,例如:

cl_mem memObject1 = clCreateBuffer(context,CL_MEM_READ_ONLY |
                                   CL_MEM_COPY_HOST_PTR ,
                               sizeof(float) * MIXSIZE,&input1,&error);

在这里介绍两点:
其一
在本次实验中,我们需要将缓存对象的创建和数据传输分开执行,例如:

cl_mem memObject1 = clCreateBuffer(context,CL_MEM_READ_ONLY ,
                           sizeof(float) * MIXSIZE,NULL,&error);
error = clEnqueueWriteBuffer(queue, memObject1, CL_FALSE, 0, 
                              MIXSIZE * sizeof(float), a_in, 0, NULL, NULL);

因为在循环调用中我们并不需要每次循环都创建缓存对象,只需要一次创建,以后只需要向缓存对象中写入数据即可;
其二
由于是循环调用,所以涉及到同步问题,主要是内核执行需要在数据读入之后进行;下一次循环需要在上一次循环结束之后开始。关于主机端同步,在上一篇博客中有介绍:
http://blog.csdn.net/u011028771/article/details/52761136

host.c
#include<stdio.h>
#include<windows.h>
#include<CL/cl.h>
#pragma warning( disable : 4996 )
#define MIXSIZE 8192

int main() {
    cl_int error;
    cl_platform_id platforms;
    cl_device_id devices;
    cl_context context;
    FILE *program_handle;
    size_t program_size;
    char *program_buffer;
    cl_program program;
    size_t log_size;
    char *program_log;
    char kernel_name[] = "createBuffer";
    cl_kernel kernel;
    cl_command_queue queue;
    //获取平台
    error = clGetPlatformIDs(1, &platforms, NULL);
    if (error != 0) {
        printf("Get platform failed!");
        return -1;
    }
    error = clGetDeviceIDs(platforms, CL_DEVICE_TYPE_GPU, 1, &devices, NULL);
    if (error != 0) {
        printf("Get device failed!");
        return -1;
    }
    //创建上下文
    context = clCreateContext(NULL,1,&devices,NULL,NULL,&error);
    if (error != 0) {
        printf("Creat context failed!");
        return -1;
    }
    //创建程序
    program_handle = fopen("kernel.cl","rb");
    if (program_handle == NULL) {
        printf("The kernle can not be opened!");
        return -1;
    }
    fseek(program_handle,0,SEEK_END);
    program_size = ftell(program_handle);
    rewind(program_handle);

    program_buffer = (char *)malloc(program_size+1);
    program_buffer[program_size] = '\0';
    error=fread(program_buffer,sizeof(char),program_size,program_handle);
    if (error == 0) {
        printf("Read kernel failed!");
        return -1;
    }
    fclose(program_handle);
    program = clCreateProgramWithSource(context,1,(const char **)&program_buffer,&program_size,&error);
    if (error < 0) {
        printf("Couldn't create the program!");
        return -1;
    }
    //编译程序
    error = clBuildProgram(program,1,&devices,NULL,NULL,NULL);
    if (error < 0) {
        //确定日志文件的大小
        clGetProgramBuildInfo(program,devices,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size);
        program_log = (char *)malloc(log_size+1);
        program_log[log_size] = '\0';
        //读取日志
        clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, log_size+1, program_log, NULL);
        printf("%s\n",program_log);
        free(program_log);
        getchar();
        return -1;
    }
    //创建命令队列
    queue = clCreateCommandQueue(context, devices, CL_QUEUE_PROFILING_ENABLE, &error);
    if (error < 0) {
        printf("Coudn't create the command queue");
        return -1;
    }
    //创建内核
    kernel = clCreateKernel(program,kernel_name,&error);
    if (kernel==NULL) {
        printf("Couldn't create kernel!\n");
        return -1;
    }
    //创建缓存对象
    cl_mem memObject1 = clCreateBuffer(context,CL_MEM_READ_ONLY ,
                                                                    sizeof(float) * MIXSIZE,NULL,&error);
    if (error < 0) {
        printf("Creat memObject1 failed!\n");
        return -1;
    }
    cl_mem memObject2 = clCreateBuffer(context, CL_MEM_READ_ONLY , 
                                                                    sizeof(float) * MIXSIZE, NULL, &error);
    if (error < 0) {
        printf("Creat memObject2 failed!\n");
        return -1;
    }
    cl_mem memObject3 = clCreateBuffer(context, CL_MEM_READ_WRITE , sizeof(float) * MIXSIZE, NULL, &error);
    if (error < 0) {
        printf("Creat memObject3 failed!\n");
        return -1;
    }
    //设置内核参数
    error = clSetKernelArg(kernel,0,sizeof(cl_mem),&memObject1);
    error|= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObject2);
    error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObject3);
    if (error != CL_SUCCESS) {
        printf("Error setting kernel arguments!\n");
        return -1;
    }
    //该实验是kernel循环调用;将创建缓冲对象和数据写入缓冲对象分开执行
    //kernel有三个输入参数
    //实现的功能:
    //a和b更新10次,result用于将上一次运算的结果与自身数据加和,并负责输出最终结果;

    //初始化参数
    float result[MIXSIZE];
    float a_in[MIXSIZE];
    float b_in[MIXSIZE];
    float c_in[MIXSIZE];
    cl_int status = 0;
    cl_event evt1;
    cl_event evt2;
    cl_event evt3;
    for (int i = 0; i < MIXSIZE; i++) {
        a_in[i] = 1.0*i;
        b_in[i] = i*2.0;
        c_in[i] = 0.0;
        result[i] = 0.0;
    }
    //配置工作项
    size_t maxWorkGroupSize = 0;
    clGetDeviceInfo(devices, CL_DEVICE_MAX_WORK_GROUP_SIZE,
        sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
    size_t globalWorkSize = MIXSIZE;
    size_t localWorkSize = maxWorkGroupSize;
    for (int j = 0; j < 10; j++) {
        error = clEnqueueWriteBuffer(queue, memObject1, CL_FALSE, 0, MIXSIZE * sizeof(float), a_in, 0, NULL, &evt1);
        if (error != CL_SUCCESS) {
            printf("write data failed!\n");
            return -1;
        }
        error = clEnqueueWriteBuffer(queue, memObject2, CL_FALSE, 0, MIXSIZE * sizeof(float), b_in, 1, &evt1, &evt2);
        if (error != CL_SUCCESS) {
            printf("write data failed!\n");
            return -1;
        }

        for (int i = 0; i < MIXSIZE; i++) {
            c_in[i] += a_in[i] + b_in[i];
        }
        //执行内核
        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkSize, &localWorkSize, 1, &evt2, &evt3);
        if (error != CL_SUCCESS) {
            printf("Error queuing kernel for execution!\n");
            return -1;
        }
        //数据初始化
        for (int i = 0; i < MIXSIZE; i++) {
            a_in[i] = 1.0*i;
            b_in[i] = i*2.0;
        }
        //同步
        clWaitForEvents(1, &evt3);
        clReleaseEvent(evt1);
        clReleaseEvent(evt2);
        clReleaseEvent(evt3);
    }

    //读取执行结果
    error = clEnqueueReadBuffer(queue,memObject3,CL_TRUE,0,MIXSIZE*sizeof(float),result,0,NULL,NULL);
    if (error != CL_SUCCESS) {
        printf("Error reading result buffer!\n");
        return -1;
    }
    //显示结果
    for (int i = 0; i < MIXSIZE; i++) {
        if (result[i] != c_in[i]) { 
            printf("failed!\n");
            printf("%f,%f,%d\n",result[i],c_in[i],i);
            getchar();
            return 0;
        }
    }
    printf("successed!\n");

    clReleaseProgram(program);
    clReleaseContext(context);
    clReleaseCommandQueue(queue);
    clReleaseDevice(devices);
    clReleaseKernel(kernel);

    getchar();
    return 0;
}
kernel.cl
//数据加法
__kernel void createBuffer(__global const float *a_in,
    __global const float *b_in,
    __global float *result) {
int gid = get_global_id(0);
    result[gid] += a_in[gid] + b_in[gid];
}

注意:
循环调用,所以memObject3的属性需要设置为读写;

不足或者错误之处欢迎指正

这里写图片描述

没有更多推荐了,返回首页

私密
私密原因:
请选择设置私密原因
  • 广告
  • 抄袭
  • 版权
  • 政治
  • 色情
  • 无意义
  • 其他
其他原因:
120
出错啦
系统繁忙,请稍后再试

关闭