YUV420p快速转RGB

typedef   UCHAR    uint8_t;
typedef   ULONGLONG   uint64_t;

#define MAXIMUM_Y_WIDTH 2000
static uint64_t mmw_mult_Y    = 0x2568256825682568;
static uint64_t mmw_mult_U_G  = 0xf36ef36ef36ef36e;
static uint64_t mmw_mult_U_B  = 0x40cf40cf40cf40cf;
static uint64_t mmw_mult_V_R  = 0x3343334333433343;
static uint64_t mmw_mult_V_G  = 0xe5e2e5e2e5e2e5e2;
static uint64_t mmb_0x10      = 0x1010101010101010;
static uint64_t mmw_0x0080    = 0x0080008000800080;
static uint64_t mmw_0x00ff    = 0x00ff00ff00ff00ff;
static uint64_t mmw_cut_red   = 0x7c007c007c007c00;
static uint64_t mmw_cut_green = 0x03e003e003e003e0;
static uint64_t mmw_cut_blue  = 0x001f001f001f001f;

void YUV_TO_RGB24(    uint8_t *puc_y,        int stride_y, 
uint8_t *puc_u,        uint8_t *puc_v, int stride_uv, 
uint8_t *puc_out,    int width_y,    int height_y,int stride_out) 
{
int y, horiz_count;
uint8_t *puc_out_remembered;
//int stride_out = width_y * 3;

if (height_y < 0) {
//we are flipping our output upside-down
height_y  = -height_y;
puc_y     += (height_y   - 1) * stride_y ;
puc_u     += (height_y/2 - 1) * stride_uv;
puc_v     += (height_y/2 - 1) * stride_uv;
stride_y  = -stride_y;
stride_uv = -stride_uv;
}

horiz_count = -(width_y >> 3);

for (y=0; y<height_y; y++) {
if (y == height_y-1) {
//this is the last output line - we need to be careful not to overrun the end of this line
uint8_t temp_buff[3*MAXIMUM_Y_WIDTH+1];
puc_out_remembered = puc_out;
puc_out = temp_buff; //write the RGB to a temporary store
}
_asm {
push eax
push ebx
push ecx
push edx
push edi

mov eax, puc_out       
mov ebx, puc_y       
mov ecx, puc_u       
mov edx, puc_v
mov edi, horiz_count

horiz_loop:

movd mm2, [ecx]
pxor mm7, mm7

movd mm3, [edx]
punpcklbw mm2, mm7       

movq mm0, [ebx]          
punpcklbw mm3, mm7       

movq mm1, mmw_0x00ff     

psubusb mm0, mmb_0x10    

psubw mm2, mmw_0x0080    
pand mm1, mm0            

psubw mm3, mmw_0x0080    
psllw mm1, 3             

psrlw mm0, 8             
psllw mm2, 3             

pmulhw mm1, mmw_mult_Y   
psllw mm0, 3             

psllw mm3, 3             
movq mm5, mm3            

pmulhw mm5, mmw_mult_V_R 
movq mm4, mm2            

pmulhw mm0, mmw_mult_Y   
movq mm7, mm1            

pmulhw mm2, mmw_mult_U_G 
paddsw mm7, mm5

pmulhw mm3, mmw_mult_V_G
packuswb mm7, mm7

pmulhw mm4, mmw_mult_U_B
paddsw mm5, mm0      

packuswb mm5, mm5
paddsw mm2, mm3          

movq mm3, mm1            
movq mm6, mm1            

paddsw mm3, mm4
paddsw mm6, mm2

punpcklbw mm7, mm5
paddsw mm2, mm0

packuswb mm6, mm6
packuswb mm2, mm2

packuswb mm3, mm3
paddsw mm4, mm0

packuswb mm4, mm4
punpcklbw mm6, mm2

punpcklbw mm3, mm4

// 32-bit shuffle.
pxor mm0, mm0

movq mm1, mm6
punpcklbw mm1, mm0

movq mm0, mm3
punpcklbw mm0, mm7

movq mm2, mm0

punpcklbw mm0, mm1
punpckhbw mm2, mm1

// 24-bit shuffle and sav
movd   [eax], mm0
psrlq mm0, 32

movd  3[eax], mm0

movd  6[eax], mm2

psrlq mm2, 32            

movd  9[eax], mm2        

// 32-bit shuffle.
pxor mm0, mm0            

movq mm1, mm6            
punpckhbw mm1, mm0       

movq mm0, mm3            
punpckhbw mm0, mm7       

movq mm2, mm0            

punpcklbw mm0, mm1       
punpckhbw mm2, mm1       

// 24-bit shuffle and sav
movd 12[eax], mm0        
psrlq mm0, 32            

movd 15[eax], mm0        
add ebx, 8               

movd 18[eax], mm2        
psrlq mm2, 32            

add ecx, 4               
add edx, 4               

movd 21[eax], mm2        
add eax, 24              

inc edi
jne horiz_loop

pop edi
pop edx
pop ecx
pop ebx
pop eax

emms
}

if (y == height_y-1) {
//last line of output - we have used the temp_buff and need to copy
int x = 3 * width_y;                  //interation counter
uint8_t *ps = puc_out;                // source pointer (temporary line store)
uint8_t *pd = puc_out_remembered;     // dest pointer
while (x--) *(pd++) = *(ps++);          // copy the line
}

puc_y   += stride_y;
if (y%2) {
puc_u   += stride_uv;
puc_v   += stride_uv;
}
puc_out += stride_out; 
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值