使用 OpenCV 从文件读取彩色的 png 图像,旋转一定角度以后写回文件
● 代码,核函数
1 // rotate.cl 2 //__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP;// 设备采样器,可以启用,并删除函数 imageRotate 中的采样器参数 3 4 __kernel void imageRotate(__read_only image2d_t inputImage, __write_only image2d_t outputImage, float angle, sampler_t sampler) 5 { 6 const int width = get_image_width(inputImage), height = get_image_height(inputImage); 7 const int halfWidth = width / 2, halfHeight = height / 2; 8 const int x = get_global_id(0), y = get_global_id(1); 9 const int xt = x - halfWidth, yt = y - halfHeight; 10 const float sinFactor = sin(angle), cosFactor = cos(angle); 11 12 float2 readCoord = (float2)(halfWidth + cosFactor * xt - sinFactor * yt, readCoord.y = halfHeight + sinFactor * xt + cosFactor * yt); 13 float4 value = read_imagef(inputImage, sampler, readCoord); 14 write_imagef(outputImage, (int2)(x, y), value); 15 return; 16 }
● 代码,分三通道分别旋转
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <cl.h> 4 #include <opencv.hpp> 5 #include <opencv2\core\cvstd.hpp> // namespace cv 的定义 6 7 #pragma warning(disable : 4996) // 解封OPenCL1.2 8 9 using namespace cv; 10 11 const char *sourceProgram = "D:/Code/OpenCL/rotate.cl";// 核函数文件 12 const char *imagePath = "D:\\input.png"; 13 const float angle = 3.14f / 4; 14 15 int readSource(const char* kernelPath, char **output)// 读取文本文件,存储为 char * 16 { 17 FILE *fp; 18 int size; 19 fopen_s(&fp, kernelPath, "rb"); 20 if (!fp) 21 { 22 printf("Open kernel file failed\n"); 23 exit(-1); 24 } 25 if (fseek(fp, 0, SEEK_END) != 0) 26 { 27 printf("Seek end of file faildd\n"); 28 exit(-1); 29 } 30 if ((size = ftell(fp)) < 0) 31 { 32 printf("Get file position failed\n"); 33 exit(-1); 34 } 35 rewind(fp); 36 if ((*output = (char *)malloc(size + 1)) == NULL) 37 { 38 printf("Allocate space failed\n"); 39 exit(-1); 40 } 41 fread((void*)*output, 1, size, fp); 42 fclose(fp); 43 (*output)[size] = '\0'; 44 printf("readSource succeed, program file: %s\n", kernelPath); 45 return size; 46 } 47 48 int main() 49 { 50 // 准备平台,设备,上下文,命令队列部分 51 cl_int status; 52 cl_uint nPlatform; 53 clGetPlatformIDs(0, NULL, &nPlatform); 54 cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id)); 55 clGetPlatformIDs(nPlatform, listPlatform, NULL); 56 cl_uint nDevice = 0; 57 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice); 58 cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id)); 59 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL); 60 cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status); 61 cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status); // OpenCL1.2 62 //cl_command_queue_properties queueProp[5] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT,// OpenCL2.0 63 // CL_QUEUE_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, 64 // 0}; 65 //cl_command_queue queue = clCreateCommandQueueWithProperties(context, listDevice[0], &queueProp, &status); // 第三个参数 queueProp 各种改都会报内存越界 0xC0000005 66 67 // 图片相关 68 Mat image, channel[3]; 69 image = imread(imagePath); 70 split(image, channel); // 拆分为三通道,分别旋转后拼合 71 const int imageHeight = image.rows, imageWidth = image.cols; 72 unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth); 73 74 cl_image_format format; 75 format.image_channel_order = CL_R; // 单通道 76 format.image_channel_data_type = CL_UNORM_INT8; // 无符号 8 位整形,0 ~ 255 77 cl_image_desc desc; 78 desc.image_type = CL_MEM_OBJECT_IMAGE2D; // 可以 memset(desc,sizeof(cl_image_desc)); 后仅对前三项赋值 79 desc.image_width = imageWidth; 80 desc.image_height = imageHeight; 81 desc.image_depth = 0; 82 desc.image_array_size = 0; 83 desc.image_row_pitch = 0; 84 desc.image_slice_pitch = 0; 85 desc.num_mip_levels = 0; 86 desc.num_samples = 0; 87 desc.buffer = NULL; 88 cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, &status); 89 cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status); 90 91 // 采样器 92 cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); // OpenCL1.2 93 //cl_sampler_properties samplerProp[7] = {CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE, // OpenCL2.0 94 // CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE, 95 // CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST, 96 // 0}; 97 //cl_sampler sampler = clCreateSamplerWithProperties(context, samplerProp, &status); // 也是内存越界,用不了 98 99 // 程序和内核 100 char* source = NULL; 101 const size_t lenSource = readSource(sourceProgram, &source); 102 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status); 103 clBuildProgram(program, 1, listDevice, NULL, NULL, NULL); 104 cl_kernel kernel = clCreateKernel(program, "imageRotate", &status); 105 clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); 106 clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); 107 clSetKernelArg(kernel, 2, sizeof(cl_float), &angle); 108 clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler); 109 size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷贝图片缓冲区时使用的起点和范围参数 110 size_t globalSize[2] = { imageWidth, imageHeight }; 111 112 for (int i = 0; i < 3; i++)// 分三个通道拷入缓冲区,执行旋转操作,拷回内存 113 { 114 memcpy(imageData, channel[i].data, sizeof(unsigned char) * imageHeight * imageWidth); 115 clEnqueueWriteImage(queue, d_inputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 116 clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); 117 clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 118 memcpy(channel[i].data, imageData, sizeof(unsigned char) * imageHeight * imageWidth); 119 } 120 121 merge(channel, 3, image);// 合并通道,结果写入文件,在窗口中展示结果 122 imwrite("D:/output.png", image); 123 imshow("Result", image); 124 waitKey(0); 125 126 free(listPlatform); 127 free(listDevice); 128 clReleaseContext(context); 129 clReleaseMemObject(d_inputImage); 130 clReleaseMemObject(d_outputImage); 131 clReleaseCommandQueue(queue); 132 clReleaseProgram(program); 133 clReleaseKernel(kernel); 134 //getchar(); 135 return 0; 136 }
● 代码,四个通道同时操作,注意图片读入和输出的时候只有三个通道,需要进行调整
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <cl.h> 4 #include <opencv.hpp> 5 #include <opencv2\core\cvstd.hpp> // namespace cv 的定义 6 7 #pragma warning(disable : 4996) // 解封OPenCL1.2 8 9 using namespace cv; 10 11 const char *sourceProgram = "D:/Code/OpenCL/rotate.cl";// 核函数文件 12 const char *imagePath = "D:/input.png"; 13 const float angle = 3.14f / 4; 14 15 int readSource(const char* kernelPath, char **output)// 读取文本文件,存储为 char * 16 { 17 FILE *fp; 18 int size; 19 fopen_s(&fp, kernelPath, "rb"); 20 if (!fp) 21 { 22 printf("Open kernel file failed\n"); 23 exit(-1); 24 } 25 if (fseek(fp, 0, SEEK_END) != 0) 26 { 27 printf("Seek end of file faildd\n"); 28 exit(-1); 29 } 30 if ((size = ftell(fp)) < 0) 31 { 32 printf("Get file position failed\n"); 33 exit(-1); 34 } 35 rewind(fp); 36 if ((*output = (char *)malloc(size + 1)) == NULL) 37 { 38 printf("Allocate space failed\n"); 39 exit(-1); 40 } 41 fread((void*)*output, 1, size, fp); 42 fclose(fp); 43 (*output)[size] = '\0'; 44 printf("readSource succeed, program file: %s\n", kernelPath); 45 return size; 46 } 47 48 int main() 49 { 50 // 准备平台,设备,上下文,命令队列部分 51 cl_int status; 52 cl_uint nPlatform; 53 clGetPlatformIDs(0, NULL, &nPlatform); 54 cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id)); 55 clGetPlatformIDs(nPlatform, listPlatform, NULL); 56 cl_uint nDevice = 0; 57 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice); 58 cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id)); 59 clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL); 60 cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status); 61 cl_command_queue queue = clCreateCommandQueue(context, listDevice[0], 0, &status);// OpenCL1.2 62 63 // 图片相关 64 Mat image = imread(imagePath); 65 const int imageHeight = image.rows, imageWidth = image.cols; 66 unsigned char *imageData = (unsigned char*)malloc(sizeof(unsigned char) * imageHeight * imageWidth * 4); 67 68 for (int i = 0; i < imageWidth * imageHeight; i++)// imread 读进来只有 RGB 三个通道(可能跟图片本身有关),要补成 4 个通道 69 { 70 imageData[4 * i + 0] = image.data[3 * i + 2];//R 71 imageData[4 * i + 1] = image.data[3 * i + 1];//G 72 imageData[4 * i + 2] = image.data[3 * i + 0];//B 73 imageData[4 * i + 3] = 255; //A 74 } 75 76 cl_image_format format; 77 format.image_channel_order = CL_RGBA; // 合并通道 78 format.image_channel_data_type = CL_UNORM_INT8; // 无符号 8 位整形,0 ~ 255 79 cl_image_desc desc; 80 desc.image_type = CL_MEM_OBJECT_IMAGE2D; // 可以 memset(desc,sizeof(cl_image_desc)); 后仅对前三项赋值 81 desc.image_width = imageWidth; 82 desc.image_height = imageHeight; 83 desc.image_depth = 0; 84 desc.image_array_size = 0; 85 desc.image_row_pitch = 0; 86 desc.image_slice_pitch = 0; 87 desc.num_mip_levels = 0; 88 desc.num_samples = 0; 89 desc.buffer = NULL; 90 cl_mem d_inputImage = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &format, &desc, imageData, &status);// 输入图片直接在主机上 91 cl_mem d_outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &status); 92 93 // 采样器 94 cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status); // OpenCL1.2 95 96 // 程序和内核 97 char* source = NULL; 98 const size_t lenSource = readSource(sourceProgram, &source); 99 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source, &lenSource, &status); 100 clBuildProgram(program, 1, listDevice, NULL, NULL, NULL); 101 cl_kernel kernel = clCreateKernel(program, "imageRotate", &status); 102 clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); 103 clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); 104 clSetKernelArg(kernel, 2, sizeof(cl_float), &angle); 105 clSetKernelArg(kernel, 3, sizeof(cl_sampler), &sampler); 106 size_t origin[3] = { 0, 0, 0 }, region[3] = { imageWidth, imageHeight, 1 };// 拷贝图片缓冲区时使用的起点和范围参数 107 size_t globalSize[2] = { imageWidth, imageHeight }; 108 109 clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); 110 clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); 111 112 for (int i = 0; i < imageWidth * imageHeight; i++)// 去掉第 4 个通道,返回 image 中 113 { 114 image.data[3 * i + 0] = imageData[4 * i + 2];//B 115 image.data[3 * i + 1] = imageData[4 * i + 1];//G 116 image.data[3 * i + 2] = imageData[4 * i + 0];//R 117 } 118 119 imwrite("D:/output.png", image); 120 imshow("Result", image); 121 waitKey(0); 122 123 free(listPlatform); 124 free(listDevice); 125 clReleaseContext(context); 126 clReleaseMemObject(d_inputImage); 127 clReleaseMemObject(d_outputImage); 128 clReleaseCommandQueue(queue); 129 clReleaseProgram(program); 130 clReleaseKernel(kernel); 131 //getchar(); 132 return 0; 133 }
● 输入、输出结果,顺时针转 45 度,因为使用了最近邻采样,结果中锯齿比较严重
● 另一种解封旧 API 的方法,在 包含头文件 <cl.h> 前使用 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS ,其中 1_2 可以改成 1_0,1_1 等(https://stackoverflow.com/questions/28500496/opencl-function-found-deprecated-by-visual-studio/28500846#28500846)
● 使用 cl_command_queue_properties 和函数 clCreateCommandQueueWithProperties 来创建命令队列,或是用 cl_sampler_properties 和函数 clCreateSamplerWithProperties 来创建采样器都失败了,报内存访问越界错误(0xC0000005),无论是按格式书写还是把 queueProp 改成 0,创建时第三个参数写成 &queueProp 都不行;有人说更新显卡驱动以后就好了(https://stackoverflow.com/questions/39864947/opencl-cl-out-of-host-memory-on-clcreatecommandqueuewithproperties-with-minima)。最后解决了,用 AMD APP SDK 下面的动态库 amdocl64.dll 替换掉 C:\Windows\System32 里边那个相同库就好了,可以完全使用 OpenCL2.0 的 API,不再报错。
● cv::imread 读入的图片是按照 [ R, G, B, R, G, B, R, G, B, ...] 存放的,在用 OpenCL处理之前需要进行一定的预处理,要么用 split 分解各通道为单独的图片,要么手工拆解,算完以后也要按照这种存放方式转回图像数据中。在发现通道个数和顺序的问题前,要么在调用函数 clCreateImage 的时候返回 -37,-38,-39,要么直接旋转得到像下面这样的图片。以后记得,如果出现这种交叉条纹的图像,有可能是通道交错导致的。
● 吐槽一下,网上能找到的 OpenCL + OpenCV 做图片旋转的基本上有几个版本(https://blog.csdn.net/c602273091/article/details/45418223,https://blog.csdn.net/icamera0/article/details/71598323,https://blog.csdn.net/jaccen2012/article/details/51367388)都是用 FreeImage 库把图像处理成灰度图来旋转的(参考了 刘文志等(2016). OpenCL 异构并行计算[M]. 的代码?),输出肯定是灰度图了,然后大家博客就相互抄吧,全是垃圾。好不容易找到一个彩色的(https://blog.csdn.net/Bob_Dong/article/details/64906734)代码还看不了。