直接上代码,经过测试转四路1080P 的流数据 比 opencv 要快
void rgba2rgb_with_neon(const uint8_t *rgba, uint8_t *rgb,
int height, int width)
{
const int total_pixels = height * width;
const int stride_pixels = 16;
const int left_pixels = total_pixels % stride_pixels;
const int multiply_pixels = total_pixels - left_pixels;
for (int i = 0; i < multiply_pixels; i += stride_pixels)
{
const uint8_t *src = rgba + i * 4;
uint8_t *dst = rgb + i * 3;
uint8x16x4_t a = vld4q_u8(src);
uint8x16x3_t b;
b.val[0] = a.val[2];
b.val[1] = a.val[1];
b.val[2] = a.val[0];
vst3q_u8(dst, b);
}
// handling non-multiply array lengths
for (int i = multiply_pixels; i < total_pixels; i++)
{
const int src_idx = i * 4;
const int dst_idx = i * 3;
*(rgb + dst_idx) = *(rgba + src_idx+2);
*(rgb + dst_idx + 1) = *(rgba + src_idx + 1);
*(rgb + dst_idx + 2) = *(rgba + src_idx);
}
}