Assembly x64 Intro - SSE2 IDCT


void WelsIDctFourT4Rec_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct)

{
  int32_t iDstStridex4  = iStride << 2;
  int32_t iPredStridex4 = iPredStride << 2;
  WelsIDctT4Rec_c (pRec,                  iStride, pPred,      iPredStride, pDct);
  WelsIDctT4Rec_c (&pRec[4],              iStride, &pPred[4],     iPredStride, pDct + 16);
  WelsIDctT4Rec_c (&pRec[iDstStridex4  ], iStride, &pPred[iPredStridex4  ], iPredStride, pDct + 32);
  WelsIDctT4Rec_c (&pRec[iDstStridex4 + 4], iStride, &pPred[iPredStridex4 + 4], iPredStride, pDct + 48);

}


;***********************************************************************
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
;***********************************************************************
WELS_EXTERN WelsIDctFourT4Rec_sse2
    %assign push_num 0
    LOAD_5_PARA
    PUSH_XMM 8
    SIGN_EXTENSION r1, r1d
    SIGN_EXTENSION r3, r3d
    ;Load 4x8
    SSE2_Load4x8p  r4, xmm0, xmm1, xmm4, xmm2, xmm5

    SSE2_TransTwo4x4W   xmm0, xmm1, xmm4, xmm2, xmm3
    SSE2_IDCT           xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
    SSE2_TransTwo4x4W   xmm1, xmm4, xmm0, xmm2, xmm3
    SSE2_IDCT           xmm4, xmm2, xmm3, xmm0, xmm5, xmm6, xmm1

    WELS_Zero           xmm7
    WELS_DW32           xmm6

    SSE2_StoreDiff8p   xmm4, xmm5, xmm6, xmm7, [r0      ],  [r2]
    SSE2_StoreDiff8p   xmm0, xmm5, xmm6, xmm7, [r0 + r1 ],  [r2 + r3]
    lea     r0, [r0 + 2 * r1]
    lea     r2, [r2 + 2 * r3]
    SSE2_StoreDiff8p   xmm1, xmm5, xmm6, xmm7, [r0],            [r2]
    SSE2_StoreDiff8p   xmm2, xmm5, xmm6, xmm7, [r0 + r1 ],  [r2 + r3]

    add     r4, 64
    lea     r0, [r0 + 2 * r1]
    lea     r2, [r2 + 2 * r3]
    SSE2_Load4x8p  r4, xmm0, xmm1, xmm4, xmm2, xmm5

    SSE2_TransTwo4x4W   xmm0, xmm1, xmm4, xmm2, xmm3
    SSE2_IDCT           xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
    SSE2_TransTwo4x4W   xmm1, xmm4, xmm0, xmm2, xmm3
    SSE2_IDCT           xmm4, xmm2, xmm3, xmm0, xmm5, xmm6, xmm1

    WELS_Zero           xmm7
    WELS_DW32           xmm6

    SSE2_StoreDiff8p   xmm4, xmm5, xmm6, xmm7, [r0      ],  [r2]
    SSE2_StoreDiff8p   xmm0, xmm5, xmm6, xmm7, [r0 + r1 ],  [r2 + r3]
    lea     r0, [r0 + 2 * r1]
    lea     r2, [r2 + 2 * r3]
    SSE2_StoreDiff8p   xmm1, xmm5, xmm6, xmm7, [r0],            [r2]
    SSE2_StoreDiff8p   xmm2, xmm5, xmm6, xmm7, [r0 + r1],   [r2 + r3]
    POP_XMM
    LOAD_5_PARA_POP
    ; pop        esi
    ; pop        ebx
    ret



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值