;void Mean4X4_SA(unsigned char *_pSrc,unsigned char *_pDst,int rect_w,int rect_h,int img_w);
.global Mean4X4_SA
Mean4X4_SA:.cproc _pSrc,_pDst,rect_w,rect_h,img_w
;
.no_mdep
.reg pSrc0,pSrc1,pSrc2,pSrc3,pDst
.reg p0,p1,p2,p3,q0,q1
.reg line0_7654:line0_3210
.reg line1_7654:line1_3210
.reg line2_7654:line2_3210
.reg line3_7654:line3_3210
.reg V1_01010101,V0_01010101
.reg sum0_3,sum0_2,sum0_1,sum0_0
.reg sum1_3,sum1_2,sum1_1,sum1_0
.reg sum0_10,sum0_32,sum1_10,sum1_32
.reg sum0,sum1
.reg stepX4,bias
.reg cntY,cntX,cnt,save_0
MVKL 0x01010101,V0_01010101
MVKLH 0x01010101,V0_01010101
MV V0_01010101,V1_01010101
SHL img_w,2,stepX4
SHR rect_h,2,cntY
SUB cntY,2,cntY
SHR rect_w,2,cntX
AND cntX,1,save_0
SHR cntX,1,cntX
SHR rect_w,2,rect_w
MV _pDst,pDst
MV .1 _pSrc,pSrc0
ADD .1 pSrc0,img_w,pSrc1
ADD .2 pSrc1,img_w,pSrc2
ADD .2 pSrc2,img_w,pSrc3
Mean4X4_SA_LOOPY:
;
MV cntX,cnt
MV .1 pSrc0,p0
MV .1 pSrc1,p1
MV .2 pSrc2,p2
MV .2 pSrc3,p3
MV .1 pDst ,q0
ADD .2x pDst ,1,q1
Mean4X4_SA_LOOPX:.trip 4,1024
LDNDW .1 *p0++,line0_7654:line0_3210
LDNDW .1 *p1++,line1_7654:line1_3210
LDNDW .2 *p2++,line2_7654:line2_3210
LDNDW .2 *p3++,line3_7654:line3_3210
DOTPU4 .1 line0_3210,V0_01010101,sum0_0
DOTPU4 .1 line1_3210,V0_01010101,sum0_1
DOTPU4 .2 line2_3210,V1_01010101,sum0_2
DOTPU4 .2 line3_3210,V1_01010101,sum0_3
DOTPU4 .1 line0_7654,V0_01010101,sum1_0
DOTPU4 .1 line1_7654,V0_01010101,sum1_1
DOTPU4 .2 line2_7654,V1_01010101,sum1_2
DOTPU4 .2 line3_7654,V1_01010101,sum1_3
ADD .1 sum0_0,sum0_1,sum0_10
ADD .2 sum0_2,sum0_3,sum0_32
ADD .1 sum1_0,sum1_1,sum1_10
ADD .2 sum1_2,sum1_3,sum1_32
ADD .1x sum0_10,sum0_32,sum0
ADD .2x sum1_10,sum1_32,sum1
SHR .1 sum0,4,sum0
SHR .2 sum1,4,sum1
STB .1 sum0,*q0++(2)
STB .2 sum1,*q1++(2)
BDEC Mean4X4_SA_LOOPX,cnt
[!save_0] B XEND
LDNW .1 *pSrc0++,line0_3210
LDNW .1 *pSrc1++,line1_3210
LDNW .2 *pSrc2++,line2_3210
LDNW .2 *pSrc3++,line3_3210
DOTPU4 .1 line0_3210,V0_01010101,sum0_0
DOTPU4 .1 line1_3210,V0_01010101,sum0_1
DOTPU4 .2 line2_3210,V1_01010101,sum0_2
DOTPU4 .2 line3_3210,V1_01010101,sum0_3
ADD .1 sum0_0,sum0_1,sum0_10
ADD .2 sum0_2,sum0_3,sum0_32
ADD sum0_10,sum0_32,sum0
SHR sum0,4,sum0
STB sum0,*q0++
XEND:
;
ADD .1 pSrc0,stepX4,pSrc0
ADD .1 pSrc1,stepX4,pSrc1
ADD .2 pSrc2,stepX4,pSrc2
ADD .2 pSrc3,stepX4,pSrc3
ADD .1 pDst ,rect_w,pDst
BDEC Mean4X4_SA_LOOPY,cntY
.endproc
.global Mean4X4_SA
Mean4X4_SA:.cproc _pSrc,_pDst,rect_w,rect_h,img_w
;
.no_mdep
.reg pSrc0,pSrc1,pSrc2,pSrc3,pDst
.reg p0,p1,p2,p3,q0,q1
.reg line0_7654:line0_3210
.reg line1_7654:line1_3210
.reg line2_7654:line2_3210
.reg line3_7654:line3_3210
.reg V1_01010101,V0_01010101
.reg sum0_3,sum0_2,sum0_1,sum0_0
.reg sum1_3,sum1_2,sum1_1,sum1_0
.reg sum0_10,sum0_32,sum1_10,sum1_32
.reg sum0,sum1
.reg stepX4,bias
.reg cntY,cntX,cnt,save_0
MVKL 0x01010101,V0_01010101
MVKLH 0x01010101,V0_01010101
MV V0_01010101,V1_01010101
SHL img_w,2,stepX4
SHR rect_h,2,cntY
SUB cntY,2,cntY
SHR rect_w,2,cntX
AND cntX,1,save_0
SHR cntX,1,cntX
SHR rect_w,2,rect_w
MV _pDst,pDst
MV .1 _pSrc,pSrc0
ADD .1 pSrc0,img_w,pSrc1
ADD .2 pSrc1,img_w,pSrc2
ADD .2 pSrc2,img_w,pSrc3
Mean4X4_SA_LOOPY:
;
MV cntX,cnt
MV .1 pSrc0,p0
MV .1 pSrc1,p1
MV .2 pSrc2,p2
MV .2 pSrc3,p3
MV .1 pDst ,q0
ADD .2x pDst ,1,q1
Mean4X4_SA_LOOPX:.trip 4,1024
LDNDW .1 *p0++,line0_7654:line0_3210
LDNDW .1 *p1++,line1_7654:line1_3210
LDNDW .2 *p2++,line2_7654:line2_3210
LDNDW .2 *p3++,line3_7654:line3_3210
DOTPU4 .1 line0_3210,V0_01010101,sum0_0
DOTPU4 .1 line1_3210,V0_01010101,sum0_1
DOTPU4 .2 line2_3210,V1_01010101,sum0_2
DOTPU4 .2 line3_3210,V1_01010101,sum0_3
DOTPU4 .1 line0_7654,V0_01010101,sum1_0
DOTPU4 .1 line1_7654,V0_01010101,sum1_1
DOTPU4 .2 line2_7654,V1_01010101,sum1_2
DOTPU4 .2 line3_7654,V1_01010101,sum1_3
ADD .1 sum0_0,sum0_1,sum0_10
ADD .2 sum0_2,sum0_3,sum0_32
ADD .1 sum1_0,sum1_1,sum1_10
ADD .2 sum1_2,sum1_3,sum1_32
ADD .1x sum0_10,sum0_32,sum0
ADD .2x sum1_10,sum1_32,sum1
SHR .1 sum0,4,sum0
SHR .2 sum1,4,sum1
STB .1 sum0,*q0++(2)
STB .2 sum1,*q1++(2)
BDEC Mean4X4_SA_LOOPX,cnt
[!save_0] B XEND
LDNW .1 *pSrc0++,line0_3210
LDNW .1 *pSrc1++,line1_3210
LDNW .2 *pSrc2++,line2_3210
LDNW .2 *pSrc3++,line3_3210
DOTPU4 .1 line0_3210,V0_01010101,sum0_0
DOTPU4 .1 line1_3210,V0_01010101,sum0_1
DOTPU4 .2 line2_3210,V1_01010101,sum0_2
DOTPU4 .2 line3_3210,V1_01010101,sum0_3
ADD .1 sum0_0,sum0_1,sum0_10
ADD .2 sum0_2,sum0_3,sum0_32
ADD sum0_10,sum0_32,sum0
SHR sum0,4,sum0
STB sum0,*q0++
XEND:
;
ADD .1 pSrc0,stepX4,pSrc0
ADD .1 pSrc1,stepX4,pSrc1
ADD .2 pSrc2,stepX4,pSrc2
ADD .2 pSrc3,stepX4,pSrc3
ADD .1 pDst ,rect_w,pDst
BDEC Mean4X4_SA_LOOPY,cntY
.endproc