摘要
图像深度的转化, 主要是neon移位和乘法的操作。
一、移位和相乘操作
VSHR_N: 右移动 right shifts each element in a vector by an immediate value, and places the results in the destination vector。
VMLA (Vector Multiply Accumulate): 相乘 multiplies corresponding elements in two vectors and adds the results to the corresponding element of the destination vector.
二、RGB到GRAY的转化
对于彩色转灰度,心理学公式:
gray = R*0.299 + G*0.587 + B*0.114
在此0.299+0.587+0.144=1,刚好是满偏,这是通过不同的敏感度以及经验总结出来的公式,在实际应用时,希望避免低速的浮点运算,所以需要整数算法。注意到系数都是小数点后3位,我们可以将它们缩放1000倍来实现整数运算算法,如下
gray = (R*299 + G*587 + B*114) / 1000
但是涉及到除法,为了能在后续实现移位,将1000扩展到1024,得到式子如下:
gray = (R* 306 + G*601 + B*117) / 1024 = (R* 306 + G*601 + B*117) >>10
适当的还可以在精简,压缩到8位以内,现在变成这样子:
gray = (R*75 + G*147 + B*36) >>8
程序使用C语言和neon内联函数进行rgb转灰度算法优化。
//未使用neon优化的算法
void rgb2gray (const char* src, int n, char* dest)
{
int i;
for (i=0; i<n; i++)
{
int r = *src++; // load red
int g = *src++; // load green
int b = *src++; // load blue
// 不同的权证
int tmp = (r*77) +(g*151) +(b*28);
// 移位操作
*dest = tmp >>8;
dest++;
}
}
//使用内联neon函数优化后的算法
void neon_rgb2gray (const char* src, int n, char* dest)
{
int i;
uint8x8_t rdup = vdup_n_u8 (77); //将77(8位)存入rfac中
uint8x8_t gdup = vdup_n_u8 (151);
uint8x8_t bdup = vdup_n_u8 (28);
n /= 8;
for (i=0; i<n; i++)
{
uint16x8_t temp; //uint16位*8
uint8x8x3_t rgb = vld3_u8 (src); //结构体rgb
uint8x8_t result;
temp = vmull_u8(rgb.val[0], rdup); //长向量乘法
temp = vmlal_u8(temp, rgb.val[1], gdup);
temp = vmlal_u8(temp, rgb.val[2], bdup);
result = vshrn_n_u16(temp, 8); //右移
vst1_u8(dest, result); //存储
src += 8*3;
dest += 8;
}
}
三、RGB565与RGB888互相转
图像是以RGB888格式显示图像的,24位图每个像素保存了32bit的数据,即RGB888+ Alpha, Alpha就是半透明填充字节……但是对于真彩的图像而言,肉眼在16bit的时候已经难以分辨了。因此,有些时候,可以讲RGB888转换为RGB565来存储,减少了存储器的容量的同时,降低了数据量;在后端显示的时候,再次把RGB565转换为RGB888,实现数据宽度的匹配。
RGB888转RGB565,只要提取相应单色高位即可(R5 G6 B5),但会导致低位的缺失,影响精度,而且无法恢,RGB565转RGB888,方法只要补充相应单色低位即可(R3 G2 B3)。
RGB888用unsigned int 32位字节存储
0 0 0 0 0 0 0 0 R7 R6 R5 R4 R3 R2 R1 R0 G7 G6 G5 G4 G3 G2 G1 G0 B7 B6 B5 B4 B3 B2 B1 B0
RGB565用unsigned short 16位字节存储
R7 R6 R5 R4 R3 G7 G6 G5 G4 G3 G2 B7 B6 B5 B4 B3
unsigned short RGB888ToRGB565(unsigned int n888Color)
{
unsigned short n565Color = 0;
// 获取RGB
unsigned char cRed = (n888Color & RGB888_RED) >> 19;
//右移八位再次移动两位,保留6位
unsigned char cGreen = (n888Color & RGB888_GREEN) >> 10;
//保留高5位
unsigned char cBlue = (n888Color & RGB888_BLUE) >> 3;
// 连接
n565Color = (cRed << 11) + (cGreen << 5) + (cBlue << 0);
return n565Color;
}
unsigned int RGB565ToRGB888(unsigned short n565Color)
{
unsigned int n888Color = 0;
//左移动3位,再右移动11位
unsigned char cRed = (n565Color & RGB565_RED) >> 8;
//左移动2位,再右移动5位
unsigned char cGreen = (n565Color & RGB565_GREEN) >> 3;
//五位凑齐8位
unsigned char cBlue = (n565Color & RGB565_BLUE) << 3;
// 连接
n888Color = (cRed << 16) + (cGreen << 8) + (cBlue << 0);
return n888Color;
}
VMOVL (Vector Move Long) takes each element in a doubleword vector and sign or zero-extendsthem to twice their original length. The results are stored in a quadword vector
int i;
const int pixelsPerLoop = 8;
for(i = 0; i < pixelCount; i += pixelsPerLoop, inPixel32 += pixelsPerLoop, outPixel16 += pixelsPerLoop) {
//Read all r,g,b pixels into 3 registers
uint8x8x4_t rgba = vld4_u8(inPixel32);
//Right-shift
uint8x8_t r = vshr_n_u8(rgba.val[0], 3);
uint8x8_t g = vshr_n_u8(rgba.val[1], 2);
uint8x8_t b = vshr_n_u8(rgba.val[2], 3);
//b channel
uint16x8_t r5_g6_b5 = vmovl_u8(b);
//r channel
uint16x8_t r16 = vmovl_u8(r);
//排列565即移动11位, 取r
r16 = vshlq_n_u16(r16, 11);
r5_g6_b5 |= r16;
//gchannel
uint16x8_t g16 = vmovl_u8(g);
//排列565,即移动5
g16 = vshlq_n_u16(g16, 5);
r5_g6_b5 |= g16;
//Now write back to memory
vst1q_u16(outPixel16, r5_g6_b5);
}
VSRI_N right shifts each element in the second input vector by an immediate value, and inserts the results in the destination vector. It does not affect the highest n significant bits of the elements in the destination register. Bits shifted out of the right of each element are lost.
void setBgr888ToRgb565()
{
uint8_t *src = image_src;
uint16_t *dst = image_dst;
int count = PIXEL_NUMBER;
while (count >= 8) {
uint8x8x3_t vsrc;
uint16x8_t vdst;
vsrc = vld3_u8(src);
//the b channel, u8--u16
vdst = vshll_n_u8(vsrc.val[0], 8);
//g channel shift the 5 bit then insert to vdst
vdst = vsriq_n_u16(vdst, vshll_n_u8(vsrc.val[1], 8), 5);
//r channel shift the 11 bit then insert to vdst
vdst = vsriq_n_u16(vdst, vshll_n_u8(vsrc.val[2], 8), 11);
vst1q_u16(dst, vdst);
dst += 8;
src += 8*3;
count -= 8;
}
}
总结
移位和相乘的操作