__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP;
__kernel
void rotation(__read_only image2d_t inputImage, __write_only image2d_t outputImage, int imageWidth, int imageHeight, float theta)
{
int x = get_global_id(0);
int y = get_global_id(1);
float x0 = imageWidth / 2.0f;
float y0 = imageHeight / 2.0f;
int xprime = x - x0;
int yprime = y - y0;
float pi = 3.1415926;
float sinTheta = sin(pi / 180 * theta);
float cosTheta = cos(pi / 180 * theta);
float2 readCoor;
readCoor.x = xprime * cosTheta - yprime * sinTheta + x0;
readCoor.y = xprime * sinTheta + yprime * cosTheta + y0;
float4 value;
value = read_imagef(inputImage, sampler, readCoor);
write_imagef(outputImage, (int2)(x, y), value);
}
float * rotation(float * data, int width, int height, float theta)
{
cl_kernel * kernelList = getKernelList();
cl_env* env = getCLEnv();
float * hInputImage = NULL;
float * hOutputImage = NULL;
cl_mem inputImage, outputImage;
cl_image_desc desc;
cl_image_format format;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
int err = 0;
size_t globalWorkSize[2];
size_t localWorkSize[2];
globalWorkSize[0] = width;
globalWorkSize[1] = height;
localWorkSize[0] = 8;
localWorkSize[1] = 8;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = width;
desc.image_height = height;
desc.image_depth = 0;
desc.image_array_size = 0;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = NULL;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_FLOAT;
hOutputImage = (float *)calloc(width * height * 4, sizeof(float));
inputImage = clCreateImage(env->ctx, CL_MEM_READ_ONLY, &format, &desc, NULL, NULL);
outputImage = clCreateImage(env->ctx, CL_MEM_WRITE_ONLY, &format, & desc, NULL, NULL);
clEnqueueWriteImage(env->cmd_queue[0], inputImage, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
err = clSetKernelArg(kernelList[KERNEL_rotation], 0, sizeof(cl_mem), &inputImage);
err += clSetKernelArg(kernelList[KERNEL_rotation], 1, sizeof(cl_mem), &outputImage);
err += clSetKernelArg(kernelList[KERNEL_rotation], 2, sizeof(int), &width);
err += clSetKernelArg(kernelList[KERNEL_rotation], 3, sizeof(int), &height);
err += clSetKernelArg(kernelList[KERNEL_rotation], 4, sizeof(float), &theta);
err += clEnqueueNDRangeKernel(env->cmd_queue[0], kernelList[KERNEL_rotation], 2, NULL, globalWorkSize, NULL,
0, NULL, NULL);
err += clEnqueueReadImage(env->cmd_queue[0], outputImage, CL_TRUE, origin, region, 0, 0, hOutputImage, 0, NULL, NULL);
return hOutputImage;
}