Opencl入门Demo

最近负责的几个项目需要使用opencl进行编程,进行了学习,并将学习后编写的主要Demo代码记录下来,供大家初步入门使用。

opencl的介绍,原理等这里就不说了,百度一下有很多,直接切入主题。

这个demo实现两个数组的相加操作。

1.进行平台的初始化相关操作

int initPlatform(TPlatformObject* tplatformObj)
{
    cl_int err= CL_SUCCESS;
    cl_uint num_platforms;
    char platformInfo[100];
    char deviceInfo[100];
    size_t nameLen;


    printInfo("InitPlatform\n");
    /*获取opencl执行的平台*/
    err=clGetPlatformIDs(0,NULL,&num_platforms);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clGetPlatformIDS0 failed!err:%d",err);
        return err;
    }
    err=clGetPlatformIDs(1,&tplatformObj->platform,NULL);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clGetPlatformIDS1 failed!err:%d",err);
        return err;
    }

    err=clGetPlatformInfo(tplatformObj->platform,CL_PLATFORM_VENDOR,100,platformInfo,&nameLen);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clGetPlatforminfo failed!err:%d",err);
        return err;
    }
    printInfo("CL_PLATFORM_VENDOR:%s\n",platformInfo);


    /*获取平台执行设备*/
    err=clGetDeviceIDs(tplatformObj->platform,CL_DEVICE_TYPE_GPU,1,&tplatformObj->device,NULL);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clGetPlatforminfo failed!err:%d",err);
        return err;
    }
    err=clGetDeviceInfo(tplatformObj->device,CL_DEVICE_VERSION,100,deviceInfo,&nameLen);
    printInfo("initPlatform_CL_DRIVER_VERSION:%s\n",deviceInfo);


    /*创建context*/
    tplatformObj->context = clCreateContext(NULL,1,&tplatformObj->device,NULL,NULL,&err);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clCreateContext:%d",err);
        return err;
    }

    /*创建命令队列*/
    tplatformObj->queue = clCreateCommandQueue(tplatformObj->context,tplatformObj->device, 0, &err);
    if(err != CL_SUCCESS){
        printErr("initPlatform_clCreateCommandQueue:%d",err);
        return err;
    }

    return 0;
}

2.加载kernel函数,执行,直接整个文件拷贝过来了

#include "Rotate.h"

/*初始化工程*/
int initProgram(TPlatformObject* tplatformObj)
{
    FILE* fp=NULL;                /*文件指针指向*.cl内核代码文件*/
    char* program_buffer=NULL;    /*用于拷贝*.cl中的内容*/
    size_t program_size;          /**.cl文件大小*/
    cl_int errCode=0;


    fp=fopen("Operation.cl","r");
    if(fp == NULL){
        printErr("initProgram:fp is NULL\n");
        return -1;
    }
    fseek(fp,0,SEEK_END);         /*文件指针指向文件结尾*/
    program_size=ftell(fp);       /*计算出文件开头与fp指针位置之间的数据大小*/
    rewind(fp);                   /*让fp指针重新回到文件头位置*/
    program_buffer=(char*)malloc(program_size+1);   /*分配内存*/
    program_buffer[program_size]='\0';              /*添加结尾标志*/
    fread(program_buffer,sizeof(char),program_size,fp);
    fclose(fp);

    /*创建程序对象*/
    tplatformObj->program=clCreateProgramWithSource(tplatformObj->context,
        1,(const char**) &program_buffer,&program_size,&errCode);
    if(errCode != CL_SUCCESS){
        printErr("initProgram-Couldn't create the program,errcode=%d\n",errCode);
        free(program_buffer);
        program_buffer=NULL;
        return -1;
    }
    free(program_buffer);
    program_buffer=NULL;

    /*构建程序执行体*/
    errCode=clBuildProgram(tplatformObj->program,0,NULL,NULL,NULL,NULL);
    if(errCode != CL_SUCCESS){
        printErr("initProgram_clBuildProgram failed!errcode is :%d\n",errCode);
        return -1;
    }

    return 0;
}

/*获取创建的kernel函数句柄*/
int CreateKernel(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
    cl_int errCode=0;
    tRotObj->VecAdd=clCreateKernel(tplatformObj->program,"vecadd",&errCode);
    if(errCode != CL_SUCCESS){
        printErr("CreateKernel failed! errCode:%d\n",errCode);
        return -1;
    }
    return 0;
}

/*创建buffer*/
int CreateBuffer(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
    cl_int errCode=0;
    tRotObj->cl_A=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        1024*sizeof(float),(void*)tRotObj->A,&errCode);
    if(errCode != CL_SUCCESS){
        printErr("clCreateBuffer cl_A failed!!");
    }

    tRotObj->cl_B=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        1024*sizeof(float),(void*)tRotObj->B,&errCode);
    if(errCode != CL_SUCCESS){
        printErr("clCreateBuffer cl_B failed!!");
    }

    tRotObj->cl_C=clCreateBuffer(tplatformObj->context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        1024*sizeof(float),(void*)tRotObj->C,&errCode);
    if(errCode != CL_SUCCESS){
        printErr("clCreateBuffer cl_B failed!!");
    }

    return 0;
}


/*为kernel函数设置参数*/
int SetKernelArgs(TRotObj* tRotObj)
{
    cl_int errCode=CL_SUCCESS;
    errCode=clSetKernelArg(tRotObj->VecAdd,0,sizeof(cl_mem),&tRotObj->cl_A);
    errCode=clSetKernelArg(tRotObj->VecAdd,1,sizeof(cl_mem),&tRotObj->cl_B);
    errCode=clSetKernelArg(tRotObj->VecAdd,2,sizeof(cl_mem),&tRotObj->cl_C);
    return errCode;
}


int RotateOpen(TPlatformObject* tplatformObj,TRotObj* tRotObj)
{
    int ret=0;
    int i=0;
    ret=initProgram(tplatformObj);
    if(ret != CL_SUCCESS){
        printErr("RotateOpen initProgram failed!!");
    }
    printInfo("initProgram done!!\n");

    /*创建kernel可执行函数*/
    if(CreateKernel(tplatformObj,tRotObj) != CL_SUCCESS){
        printErr("RotateOpen CreateKernel failed!!");
        return -1;
    }

    /*测试用数组*/
    float* A=(float*)malloc(1024*sizeof(float));
    float* B=(float*)malloc(1024*sizeof(float));
    float* C=(float*)malloc(1024*sizeof(float));
    memset(A,1,sizeof(float)*1024);
    memset(B,2,sizeof(float)*1024);
    for(i=0;i<1024;i++){
        A[i]=i;
        B[i]=i;
    }
    memset(C,0,sizeof(float)*1024);
    tRotObj->A=A;
    tRotObj->B=B;
    tRotObj->C=C;
    /*创建buffer*/
    if(CreateBuffer(tplatformObj,tRotObj) != 0){
        printErr("RotateOpen CreateBuffer failed!!");
        return -1;
    }

    /*设置kernel的参数*/
    if(SetKernelArgs(tRotObj)){
        printErr("RotateOpen SetKernelArgs failed!!");
        return -1;
    }

    /*执行kernel函数*/
    cl_uint work_dim=1; /*工作项维数*/
    size_t global_work_size=1024;
    ret=clEnqueueNDRangeKernel(tplatformObj->queue,tRotObj->VecAdd,work_dim,
        NULL,&global_work_size,NULL,0,NULL,NULL);
    if(ret != CL_SUCCESS){
        printErr("RotateOpen clEnqueueNDRangeKernel failed!!");
        return -1;
    }
    clFinish(tplatformObj->queue);/*执行结束*/

    /*读取计算出的结果*/
    ret=clEnqueueReadBuffer(tplatformObj->queue,tRotObj->cl_C,CL_TRUE,0,1024*4,tRotObj->C,0,NULL,NULL);

    /*打印程序结果*/
    for(i=0;i<global_work_size;i++){
        printf("%f ",tRotObj->C[i]);
        if((i+1)%16==0){
            printf("\n");
        }
    }

    /*释放内存*/
    clReleaseCommandQueue(tplatformObj->queue);
    clReleaseContext(tplatformObj->context);
    clReleaseDevice(tplatformObj->device);
    clReleaseKernel(tRotObj->VecAdd);
    clReleaseMemObject(tRotObj->cl_A);
    clReleaseMemObject(tRotObj->cl_B);
    free(tRotObj->A);
    free(tRotObj->B);
    free(tRotObj->C);
    return ret;
}
 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的 OpenCL 使用示例,它执行向量加法: ```c #include <stdio.h> #include <stdlib.h> #include <CL/cl.h> #define LENGTH 1024 const char *kernelSource = "__kernel void vectorAdd(__global float *a, __global float *b, __global float *c) {\n" " int i = get_global_id(0);\n" " c[i] = a[i] + b[i];\n" "}\n"; int main() { cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem a, b, c; cl_int err; int i; float *A, *B, *C; // 初始化 A, B, C 数组 A = (float*) malloc(sizeof(float) * LENGTH); B = (float*) malloc(sizeof(float) * LENGTH); C = (float*) malloc(sizeof(float) * LENGTH); for (i = 0; i < LENGTH; i++) { A[i] = (float) i; B[i] = (float) (LENGTH - i); C[i] = 0.0f; } // 获取平台和设备 err = clGetPlatformIDs(1, &platform, NULL); err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); // 创建 OpenCL 上下文和命令队列 context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); queue = clCreateCommandQueue(context, device, 0, &err); // 创建和编译内核程序 program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err); err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // 创建内核 kernel = clCreateKernel(program, "vectorAdd", &err); // 创建和设置缓冲区 a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH, NULL, &err); err = clEnqueueWriteBuffer(queue, a, CL_TRUE, 0, sizeof(float) * LENGTH, A, 0, NULL, NULL); err = clEnqueueWriteBuffer(queue, b, CL_TRUE, 0, sizeof(float) * LENGTH, B, 0, NULL, NULL); // 设定内核参数 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c); // 执行内核 size_t globalSize = LENGTH; size_t localSize = 64; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // 读取结果 err = clEnqueueReadBuffer(queue, c, CL_TRUE, 0, sizeof(float) * LENGTH, C, 0, NULL, NULL); // 打印结果 for (i = 0; i < LENGTH; i++) { printf("%f + %f = %f\n", A[i], B[i], C[i]); } // 释放内存和资源 free(A); free(B); free(C); clReleaseMemObject(a); clReleaseMemObject(b); clReleaseMemObject(c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; } ``` 这个程序的作用是将两个向量相加,并打印结果。程序首先初始化了三个数组 A, B, C,然后获取 OpenCL 平台和设备,创建上下文和命令队列,创建和编译内核程序,创建内核,创建和设置缓冲区,设定内核参数,执行内核,读取结果,打印结果,最后释放内存和资源。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值