import pyopencl as cl
import numpy as np
import time
np.random.seed(1)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
image = np.random.rand(1920, 1080).astype(np.float32)
print(f"input numpy mean = {image.mean()}")
output = np.zeros_like(image)
buf_image = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=image)
buf_output = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=output)
kernel_source = """
__kernel void mean_filter(__global const float *input,
__global float *output_arr) {
int k_size = 3 / 2; // 滤波核大小
int height = get_global_size(0);
int width = get_global_size(1);
int y = get_global_id(0);
int x = get_global_id(1);
float sum = 0.0f;
for (int m = -k_size; m <= k_size; ++m) {
int bias_y = y + m;
bias_y = select(bias_y, -bias_y, bias_y < 0);
bias_y = select(bias_y, (height * 2 - bias_y - 2), bias_y >= height);
for (int n = -k_size; n <= k_size; ++n) {
int bias_x = x + n;
bias_x = select(bias_x, -bias_x, bias_x < 0);
bias_x = select(bias_x, (width * 2 - bias_x - 2), bias_x >= width);
sum += input[bias_y * width + bias_x];
}
}
output_arr[y * width + x] =sum/9; // output[y + x * N] = sum * (1.0 / (M * N))
}
"""
kernel = cl.Program(ctx, kernel_source).build()
start = time.time()
kernel.mean_filter(queue, image.shape, None, buf_image, buf_output)
queue.finish()
end = time.time()
t = (end - start) * 1000
print(f"opencl time = {t:.0f}ms")
cl.enqueue_copy(queue, output, buf_output)
print(f"output opencl mean = {output.mean()}")
def mean_filter(image_array, kernel_size):
height, width = image_array.shape[:2]
filtered_image = np.zeros_like(image_array)
pad = kernel_size // 2
padded_image = np.pad(image_array, ((pad, pad), (pad, pad)), mode='reflect')
for i in range(pad, pad + height):
for j in range(pad, pad + width):
window = padded_image[i - pad:i + pad + 1, j - pad:j + pad + 1]
filtered_image[i - pad, j - pad] = np.mean(window)
return filtered_image
start = time.time()
np_output = mean_filter(image, 3)
end = time.time()
t = (end - start) * 1000
print(f"numpy time = {t:.0f}ms")
print(f"output numpy mean = {np_output.mean()}")