BGRA转灰度数据

使用neon优化,iPhone4s 上测试时间在3ms,直接上代码,需要的朋友拿去
</pre><p></p><pre>
#include <arm_neon.h>
void neon_asm_convert_BGRA_to_gray(uint8_t * __restrict dest, uint8_t * __restrict src, int numPixels)
{
    
#if defined(__arm64__)
    //64
    asm volatile (
                  "movi       v4.8b, #14                     \n"
                  "movi       v5.8b, #76                     \n"
                  "movi       v6.8b, #38                     \n"
                  //"movi       v7.8b, #0                     \n"
                  "1:                                          \n"
                  "ld4        {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
                  "subs       %w2, %w2, #8                     \n"
                  "umull      v16.8h, v0.8b, v4.8b           \n"
                  "umlal      v16.8h, v1.8b, v5.8b           \n"
                  "umlal      v16.8h, v2.8b, v6.8b           \n"
                  "sqrshrun   v3.8b, v16.8h, #7              \n"
                  //"uqadd      v0.8b, v0.8b, v7.8b            \n"
                  "st1        {v3.8b}, [%0], #8              \n"
                  "b.gt       1b                             \n"
                  : "+r"(dest),
                  "+r"(src),
                  "+r"(numPixels)
                  :
                  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16"
                  );
    
#else
    //32
    asm volatile (
                  "vmov.u8    d4, #14                        \n"
                  "vmov.u8    d5, #76                        \n"
                  "vmov.u8    d6, #38                        \n"
                  //"vmov.u8    d7, #16                        \n"
                  ".p2align   2                              \n"
                  "1:                                          \n"
                  "vld4.8     {d0, d1, d2, d3}, [%1]!        \n"
                  "subs       %2, %2, #8                     \n"
                  "vmull.u8   q8, d0, d4                     \n"
                  "vmlal.u8   q8, d1, d5                     \n"
                  "vmlal.u8   q8, d2, d6                     \n"
                  "vqrshrun.s16 d3, q8, #7                   \n"
                  //"vqadd.u8   d3, d7                         \n"
                  "vst1.8     {d3}, [%0]!                    \n"
                  "bgt        1b                             \n"
                  : "+r"(dest),
                  "+r"(src),
                  "+r"(numPixels)
                  :
                  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q8"
                  );
    
#endif
    
    
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值