C语言函数:
void Image_TransAffine(unsigned char *DstData,const unsigned char *pSrcData,const int *pCoef,int iWidth,int iHeight,int iDstPitch,int iSrcPitch)
{
unsigned char *pDstData = DstData;
// unsigned char *pSrcData = SrcData;
int i = 0,j = 0,x = 0,y = 0,a = 0,b = 0,c0 = 0,c1,c2,c3= *(pCoef + 3),c4 = 0,c5,c6,c7 = *(pCoef + 7),TempPitch = 0;
#pragma MUST_ITERATE (400,,2);
for (;i < iHeight;i++)
{
c1 = c2 = c5 = c6 =0;
for (j = 0;j < iWidth;j ++)
{
a = c0 + c1 + c2 + c3;
b = c4 + c5 + c6 + c7;
x = ((a >> 10) + 1) * iSrcPitch;
y = (b >> 10) + 1;
a = a - ((a >> 10) << 10);
b = b - ((b >> 10) << 10);
*(pDstData + TempPitch + j) = (unsigned char)(((1024 - a) * (1024 - b) * (*(pSrcData + x - iSrcPitch + y - 1)) + (1024 - a) * b * (*(pSrcData + x - iSrcPitch +y)) + a * (1024 - b) * (*(pSrcData + x + y - 1)) + a * b * (*(pSrcData + x + y))) >> 20);
//
c1 += *(pCoef + 1);
c2 += *(pCoef + 2) * i;
c5 += *(pCoef + 5);
c6 += *(pCoef + 6) * i;
}
c0 += *pCoef;
c4 += *(pCoef + 4);
TempPitch += iDstPitch;
}
}
汇编语言:
;******************************************************************************
;* FUNCTION NAME: _Image_TransAffine *
;* *
;* Regs Modified : A3,A4,A5,A6,A7,A8,A9,B0,B4,B5,B6,B7,B8,B9,SP,A16,B16,*
;* B17,B18,B19,B20,B21,B22,B23 *
;* Regs Used : A3,A4,A5,A6,A7,A8,A9,A10,B0,B3,B4,B5,B6,B7,B8,B9,SP, *
;* A16,B16,B17,B18,B19,B20,B21,B22,B23 *
;* Local Frame Size : 0 Args + 92 Auto + 0 Save = 92 byte *
;******************************************************************************
_Image_TransAffine:
;** --------------------------------------------------------------------------*
.line 2
.sym _DstData,4, 28, 17, 32
.sym _pSrcData,20, 28, 17, 32
.sym _pCoef,6, 20, 17, 32
.sym _iWidth,22, 4, 17, 32
.sym _iHeight,8, 4, 17, 32
.sym _iDstPitch,24, 4, 17, 32
.sym _iSrcPitch,10, 4, 17, 32
.sym _DstData,4, 28, 1, 32
.sym _pSrcData,8, 28, 1, 32
.sym _pCoef,12, 20, 1, 32
.sym _iWidth,16, 4, 1, 32
.sym _iHeight,20, 4, 1, 32
.sym _iDstPitch,24, 4, 1, 32
.sym _iSrcPitch,28, 4, 1, 32
.sym _pDstData,32, 28, 1, 32
.sym _i,36, 4, 1, 32
.sym _j,40, 4, 1, 32
.sym _x,44, 4, 1, 32
.sym _y,48, 4, 1, 32
.sym _a,52, 4, 1, 32
.sym _b,56, 4, 1, 32
.sym _c0,60, 4, 1, 32
.sym _c1,64, 4, 1, 32
.sym _c2,68, 4, 1, 32
.sym _c3,72, 4, 1, 32
.sym _c4,76, 4, 1, 32
.sym _c5,80, 4, 1, 32
.sym _c6,84, 4, 1, 32
.sym _c7,88, 4, 1, 32
.sym _TempPitch,92, 4, 1, 32
ADDK .S2 -96,SP ; |6|
STW .D2T1 A10,*+SP(28) ; |6|
|| MV .D1X B8,A3 ; |6|
STW .D2T1 A3,*+SP(24) ; |6|
STW .D2T1 A8,*+SP(20) ; |6|
STW .D2T2 B6,*+SP(16) ; |6|
STW .D2T1 A6,*+SP(12) ; |6|
STW .D2T2 B4,*+SP(8) ; |6|
STW .D2T1 A4,*+SP(4) ; |6|
NOP 2
.line 3
MV .D2X A4,B4
STW .D2T2 B4,*+SP(32) ; |7|
NOP 2
.line 6
ZERO .D1 A3 ; |10|
ZERO .D1 A3 ; |10|
|| STW .D2T1 A3,*+SP(36) ; |10|
STW .D2T1 A3,*+SP(40) ; |10|
|| ZERO .S2 B4 ; |10|
STW .D2T2 B4,*+SP(44) ; |10|
LDW .D2T1 *+SP(12),A4 ; |10|
STW .D2T2 B4,*+SP(48) ; |10|
STW .D2T1 A3,*+SP(52) ; |10|
STW .D2T1 A3,*+SP(56) ; |10|
STW .D2T1 A3,*+SP(60) ; |10|
LDW .D1T1 *+A4(12),A3 ; |10|
NOP 2
ZERO .S2 B5 ; |10|
MV .S2X A4,B4 ; |10|
STW .D2T1 A3,*+SP(72) ; |10|
STW .D2T2 B5,*+SP(76) ; |10|
LDW .D2T2 *+B4(28),B4 ; |10|
NOP 4
ZERO .S2 B4 ; |10|
|| STW .D2T2 B4,*+SP(88) ; |10|
STW .D2T2 B4,*+SP(92) ; |10|
NOP 2
.line 8
LDW .D2T2 *+SP(20),B4 ; |12|
LDW .D2T2 *+SP(36),B5 ; |12|
NOP 4
CMPLT .L2 B5,B4,B0 ; |12|
[!B0] BNOP .S1 L4,5 ; |12|
; BRANCH OCCURS ; |12|
;** --------------------------------------------------------------------------*
;** BEGIN LOOP L1
;** --------------------------------------------------------------------------*
L1:
.line 10
ZERO .D1 A3 ; |14|
STW .D2T1 A3,*+SP(84) ; |14|
STW .D2T1 A3,*+SP(80) ; |14|
STW .D2T1 A3,*+SP(68) ; |14|
STW .D2T1 A3,*+SP(64) ; |14|
NOP 2
.line 11
ZERO .D2 B4 ; |15|
STW .D2T2 B4,*+SP(40) ; |15|
LDW .D2T2 *+SP(40),B5 ; |15|
LDW .D2T2 *+SP(16),B4 ; |15|
NOP 4
CMPLT .L2 B5,B4,B0 ; |15|
[!B0] BNOP .S1 L3,5 ; |15|
; BRANCH OCCURS ; |15|
;*----------------------------------------------------------------------------*
;* SOFTWARE PIPELINE INFORMATION
;* Disqualified loop: Software pipelining disabled
;*----------------------------------------------------------------------------*
L2:
.line 13
LDW .D2T2 *+SP(60),B6 ; |17|
LDW .D2T2 *+SP(64),B5 ; |17|
LDW .D2T2 *+SP(68),B4 ; |17|
LDW .D2T2 *+SP(72),B7 ; |17|
NOP 2
ADD .D2 B5,B6,B5 ; |17|
ADD .D2 B4,B5,B4 ; |17|
ADD .D2 B7,B4,B4 ; |17|
STW .D2T2 B4,*+SP(52) ; |17|
NOP 2
.line 14
LDW .D2T2 *+SP(76),B6 ; |18|
LDW .D2T2 *+SP(80),B4 ; |18|
LDW .D2T2 *+SP(84),B5 ; |18|
LDW .D2T2 *+SP(88),B7 ; |18|
NOP 2
ADD .D2 B4,B6,B4 ; |18|
ADD .D2 B5,B4,B4 ; |18|
ADD .D2 B7,B4,B4 ; |18|
STW .D2T2 B4,*+SP(56) ; |18|
NOP 2
.line 15
LDW .D2T2 *+SP(52),B5 ; |19|
NOP 1
LDW .D2T2 *+SP(28),B4 ; |19|
NOP 2
SHR .S1X B5,10,A3 ; |19|
ADD .D1 1,A3,A3 ; |19|
MPYLH .M1X A3,B4,A4 ; |19|
MPYLH .M2X B4,A3,B5 ; |19|
MPYU .M2X B4,A3,B4 ; |19|
ADD .D2X B5,A4,B5 ; |19|
SHL .S2 B5,16,B5 ; |19|
ADD .D2 B4,B5,B4 ; |19|
STW .D2T2 B4,*+SP(44) ; |19|
NOP 2
.line 16
LDW .D2T2 *+SP(56),B4 ; |20|
NOP 4
SHR .S2 B4,10,B4 ; |20|
ADD .D2 1,B4,B4 ; |20|
STW .D2T2 B4,*+SP(48) ; |20|
NOP 2
.line 17
LDW .D2T2 *+SP(52),B4 ; |21|
NOP 4
SHR .S2 B4,10,B4 ; |21|
|| MV .D2 B4,B5 ; |21|
SHL .S2 B4,10,B4 ; |21|
SUB .D2 B5,B4,B4 ; |21|
STW .D2T2 B4,*+SP(52) ; |21|
NOP 2
.line 18
LDW .D2T2 *+SP(56),B4 ; |22|
NOP 4
SHR .S2 B4,10,B4 ; |22|
|| MV .D2 B4,B5 ; |22|
SHL .S2 B4,10,B4 ; |22|
SUB .D2 B5,B4,B4 ; |22|
STW .D2T2 B4,*+SP(56) ; |22|
NOP 2
.line 19
LDW .D2T2 *+SP(8),B19 ; |23| 把pSrcData存放到B19
LDW .D2T2 *+SP(44),B18 ; |23| 把x存放到B18
LDW .D2T2 *+SP(28),B9 ; |23| 把iSrcpitch存放到B9
MV .S2 B4,B7 ; |23| 把b放置B7
MV .D1X B4,A7 ; |23| 把b放置A7
|| MV .L2 B4,B5 ; |23| 把b放置B5
|| LDW .D2T2 *+SP(48),B4 ; |23| 把y放置B4
LDW .D2T2 *+SP(52),B8 ; |23| 把a放置B8
MV .D2 B19,B23 ; |23| 把pSrcData放置B23
MV .D2 B9,B6 ; |23| 把iSrcpitch放置B6
|| ADD .S2 B18,B19,B19 ; |23| 把x + PSrcData的结果放置B19
|| MV .L2 B19,B21 ; |23| 把pSrcData放置B21
MVK .S1 1024,A3 ; |23| 把1024放入A3
|| SUB .D2 B19,B9,B9 ; |23| 把x + pSrcData - iSrcpitch放置B9
MVK .S2 1024,B7 ; |23| 给1024赋值为B7
|| MV .L2 B4,B22 ; |23| 把y放置B22
|| LDBU .D2T2 *+B9[B4],B20 ; |23| 取x + pSrcData - iSrcpitch + y地址的值放入B20
|| SUB .S1X A3,B7,A8 ; |23| 把1024 - b的值放入A8
SUB .D2 B7,B8,B21 ; |23| 把1024 - a的值放入B21
|| ADD .S2 B18,B21,B4 ; |23| 把x + pSrcData的结果放入B4
|| MV .L2 B21,B19 ; |23| 把pSrcData放置B19
|| MV .D1X B4,A9 ; |23| 把y值放入A9
SUB .D2 B4,B6,B4 ; |23| 把x + pSrcData - iSrcptich放置B4
MPYLH .M1X B21,A7,A5 ; |23| 把(1024 - a)LSB和(b)MSB的乘积放入A5
MPYLH .M1X A7,B21,A16 ; |23| 把(b)LSB和(1024 - a)MSB的乘积放入A16
|| MV .D2X A7,B4 ; |23| 把b放入B4
|| ADD .S2 B22,B4,B9 ; |23| 把x + pSrcData - iSrcpitch + y放入B9
MPYLH .M2X A8,B8,B17 ; |23| 把(1024 - b)LSB和(a)MSB的乘积放入B17
|| MPYU .M1X A7,B21,A7 ; |23| 把(b)LSB和(1024 - a)LSB的乘积放入A7
|| MVK .S1 1024,A4 ; |23| 把1024常数放入A4
MPYLH .M1X B8,A8,A3 ; |23| 把(a)LSB和(1024 - b)MSB的乘积放入A3
|| LDBU .D2T2 *-B9(1),B7 ; |23| 取x + pSrcData - iSrcpitch + y - 1地址的值放入B7
|| SUB .S2 B7,B5,B5 ; |23| 把1024 - b放入B5
ADD .D2 B18,B23,B18 ; |23| 把x + pSrcData放入B18
|| SUB .S2X A4,B8,B9 ; |23| 把1024 - a放入B9
|| MV .D1X B18,A6 ; |23| 把x放入A6
ADD .D1X B17,A3,A3 ; |23| 把(1024 - b)LSB和(a)MSB的乘积 + (a)LSB和(1024 - b)MSB的乘积的和放入A3
|| MPYLH .M2 B5,B9,B16 ; |23| 把(1024 - b)LSB和(1024 - a)MSB的乘积放入B16
ADD .D1X A9,B18,A9 ; |23| 把y + x + pSrcData放入A9
|| MPYLH .M2 B9,B5,B17 ; |23| 把(1024 - a)LSB和(1024 - b)MSB的乘积放入B17
MV .D2 B8,B6 ; |23| 把a放入B6
|| ADD .D1 A16,A5,A5 ; |23| 把(b)LSB和(1024 - a)MSB的乘积 + (1024 - a)LSB和(b)MSB的乘积 放入A5
|| MPYU .M2 B5,B9,B5 ; |23| 把(1024 - b)LSB和(1024 - a)LSB的乘积放入B5
|| MV .S1X B22,A4 ; |23| 把y值放入A4
MPYLH .M2 B6,B4,B9 ; |23| 把(a)LSB 和 (b)MSb的乘积放入B9
|| LDBU .D1T1 *-A9(1),A5 ; |23| 取y + x + pSrcData - 1地址的值放入A5
|| SHL .S1 A5,16,A9 ; |23| 把(b)LSB和(1024 - a)MSB的乘积 + (1024 - a)LSB和(b)MSB的乘积 的和左移16位放入A9
|| ADD .D2 B16,B17,B16 ; |23| 把(1024 - b)LSB和(1024 - a)MSB的乘积 + (1024 - a)LSB和(1024 - b)MSB的乘积放入B16
|| MPYU .M1X A8,B8,A8 ; |23| 把(a)LSB和(1024 - b)LSB的乘积放入A8
MPYLH .M2 B4,B6,B8 ; |23| 把(b)LSB和(a)MSB的乘积放入B8
|| ADD .D1 A7,A9,A7 ; |23| 把b * (1024 - a)放入A7
|| SHL .S2 B16,16,B16 ; |23| 把(1024 - b)LSB和(1024 - a)MSB的乘积 + (1024 - a)LSB和(1024 - b)MSB的乘积的和左移16位放入B16
|| ADD .S1X A6,B19,A6 ; |23| 把x + pSrcData放入A6
MPYLHU .M1X B20,A7,A6 ; |23| 把(x + pSrcData - iSrcpitch + y地址的值)LSB和 (b * (1024 - a))MSB的乘积放入A6
|| MPYU .M2 B4,B6,B6 ; |23| 把(b)LSB和(a)LSB的乘积放入B6
|| LDBU .D1T1 *+A6[A4],A4 ; |23| 取x + pSrcData + y地址的值放入A4
|| ADD .D2 B5,B16,B16 ; |23| 把(1024 - b) * (1024 - a)放入B16
SHL .S1 A3,16,A3 ; |23| 把(1024 - b)LSB和(a)MSB的乘积 + (a)LSB和(1024 - b)MSB的乘积的和左移16位放入A3
|| MPYLHU .M2 B7,B16,B4 ; |23| 把(x + pSrcData - iSrcpitch + y - 1地址的值)LSB和((1024 - b) * (1024 - a))MSB的乘积放入B4
|| ADD .D2 B8,B9,B5 ; |23| 把(b)LSB和(a)MSB的乘积 + (a)LSB 和 (b)MSb的乘积的和放入B5
MPYU .M2X B20,A7,B8 ; |23| 把(x + pSrcData - iSrcpitch + y地址的值)LSB和 (b * (1024 - a))LSB的乘积放入B8
|| SHL .S1 A6,16,A3 ; |23| 把(x + pSrcData - iSrcpitch + y地址的值)LSB和 (b * (1024 - a))MSB的乘积 左移16位放入A3
|| ADD .D1 A8,A3,A6 ; |23| 把(1024 - b) * a放入A6
|| SHL .S2 B5,16,B5 ; |23| 把(b)LSB和(a)MSB的乘积 + (a)LSB 和 (b)MSb的乘积的和左移16位放入B5
SHL .S2 B4,16,B5 ; |23| 把(x + pSrcData - iSrcpitch + y - 1地址的值)LSB和((1024 - b) * (1024 - a))MSB的乘积 左移16放入B5
|| ADD .D2 B6,B5,B4 ; |23| 把b * a放入B4
MPYU .M2 B7,B16,B7 ; |23| 把(x + pSrcData - iSrcpitch + y - 1地址的值)LSB和((1024 - b) * (1024 - a))LSB的乘积放入B7
|| ADD .D2X B8,A3,B6 ; |23| 把(x + pSrcData - iSrcpitch + y地址的值)* (b * (1024 - a))放入B6
MPYLHU .M1X A4,B4,A4 ; |23| 把(x + pSrcData + y地址的值)LSB 和(b * a)MSB的乘积放入A4
|| MPYU .M2X A4,B4,B8 ; |23| 把(x + pSrcData + y地址的值)LSB 和(b * a)LSB的乘积放入B8
LDW .D2T2 *+SP(92),B5 ; |23| 把TempPitch放入B5
|| ADD .S2 B7,B5,B7 ; |23| 把(x + pSrcData - iSrcpitch + y - 1地址的值)和((1024 - b) * (1024 - a))的乘积放入B7
|| MPYLHU .M1 A5,A6,A7 ; |23| 把(y + x + pSrcData - 1地址的值)LSB和((1024 - b) * a)MSB的乘积放入A7
LDW .D2T2 *+SP(32),B6 ; |23| 把pDstData放入B6
|| ADD .S2 B6,B7,B7 ; |23| 把(x + pSrcData - iSrcpitch + y - 1地址的值)和((1024 - b) * (1024 - a))的乘积放入B7
|| MPYU .M1 A5,A6,A5 ; |23| 把(y + x + pSrcData - 1地址的值)LSB和((1024 - b) * a)LSB的乘积放入A7
|| SHL .S1 A4,16,A4 ; |23| 把(x + pSrcData + y地址的值)LSB 和(b * a)MSB的乘积左移16位放入A4
LDW .D2T2 *+SP(40),B4 ; |23| 把j的值放入B4
|| SHL .S1 A7,16,A3 ; |23| 把(y + x + pSrcData - 1地址的值)LSB和((1024 - b) * a)MSB的乘积左移16位放入A3
ADD .D1X B8,A4,A4 ; |23| 把(x + pSrcData + y地址的值) 和(b * a)的乘积放入A4
|| ADD .S1 A5,A3,A3 ; |23| 把(y + x + pSrcData - 1地址的值)和((1024 - b) * a)的乘积放入A3
ADD .D1X A3,B7,A3 ; |23|
ADD .D1 A4,A3,A3 ; |23|
ADD .D2 B5,B6,B5 ; |23|
|| SHR .S1 A3,20,A3 ; |23|
STB .D2T1 A3,*+B5[B4] ; |23|
;** --------------------------------------------------------------------------*
.line 25
LDW .D2T1 *+SP(12),A3 ; |29|
NOP 3
LDW .D2T2 *+SP(64),B4 ; |29|
LDW .D1T1 *+A3(4),A3 ; |29|
NOP 4
ADD .D2X A3,B4,B4 ; |29|
STW .D2T2 B4,*+SP(64) ; |29|
NOP 2
.line 26
LDW .D2T1 *+SP(12),A3 ; |30|
NOP 3
LDW .D2T2 *+SP(36),B4 ; |30|
LDW .D1T1 *+A3(8),A3 ; |30|
NOP 4
MPYLH .M2X B4,A3,B5 ; |30|
MPYLH .M1X A3,B4,A5 ; |30|
LDW .D2T2 *+SP(68),B4 ; |30|
|| MPYU .M1X B4,A3,A4 ; |30|
ADD .D1X B5,A5,A3 ; |30|
SHL .S1 A3,16,A3 ; |30|
ADD .D1 A4,A3,A3 ; |30|
NOP 1
ADD .D2X A3,B4,B4 ; |30|
STW .D2T2 B4,*+SP(68) ; |30|
NOP 2
.line 27
LDW .D2T1 *+SP(12),A3 ; |31|
NOP 3
LDW .D2T2 *+SP(80),B4 ; |31|
LDW .D1T1 *+A3(20),A3 ; |31|
NOP 4
ADD .D2X A3,B4,B4 ; |31|
STW .D2T2 B4,*+SP(80) ; |31|
NOP 2
.line 28
LDW .D2T2 *+SP(12),B4 ; |32|
NOP 2
LDW .D2T2 *+SP(36),B6 ; |32|
LDW .D2T2 *+SP(84),B5 ; |32|
LDW .D2T2 *+B4(24),B4 ; |32|
NOP 4
MPYLH .M2 B6,B4,B8 ; |32|
MPYLH .M2 B4,B6,B7 ; |32|
MPYU .M2 B6,B4,B6 ; |32|
ADD .D2 B8,B7,B4 ; |32|
SHL .S2 B4,16,B4 ; |32|
ADD .D2 B6,B4,B4 ; |32|
ADD .D2 B4,B5,B4 ; |32|
STW .D2T2 B4,*+SP(84) ; |32|
NOP 2
.line 29
LDW .D2T2 *+SP(40),B4 ; |33|
NOP 4
ADD .D2 1,B4,B4 ; |33|
STW .D2T2 B4,*+SP(40) ; |33|
LDW .D2T2 *+SP(16),B5 ; |33|
NOP 4
CMPLT .L2 B4,B5,B0 ; |33|
[ B0] BNOP .S1 L2,5 ; |33|
; BRANCH OCCURS ; |33|
;** --------------------------------------------------------------------------*
L3:
.line 30
LDW .D2T1 *+SP(12),A3 ; |34|
NOP 3
LDW .D2T2 *+SP(60),B4 ; |34|
LDW .D1T1 *A3,A3 ; |34|
NOP 4
ADD .D2X A3,B4,B4 ; |34|
STW .D2T2 B4,*+SP(60) ; |34|
NOP 2
.line 31
LDW .D2T2 *+SP(12),B5 ; |35|
NOP 3
LDW .D2T2 *+SP(76),B4 ; |35|
LDW .D2T2 *+B5(16),B5 ; |35|
NOP 4
ADD .D2 B5,B4,B4 ; |35|
STW .D2T2 B4,*+SP(76) ; |35|
NOP 2
.line 32
LDW .D2T2 *+SP(24),B4 ; |36|
LDW .D2T2 *+SP(92),B5 ; |36|
NOP 4
ADD .D2 B4,B5,B4 ; |36|
STW .D2T2 B4,*+SP(92) ; |36|
NOP 2
.line 33
LDW .D2T2 *+SP(36),B4 ; |37|
NOP 4
ADD .D2 1,B4,B4 ; |37|
STW .D2T2 B4,*+SP(36) ; |37|
LDW .D2T2 *+SP(36),B5 ; |37|
LDW .D2T2 *+SP(20),B4 ; |37|
NOP 4
CMPLT .L2 B5,B4,B0 ; |37|
[ B0] BNOP .S1 L1,5 ; |37|
; BRANCH OCCURS ; |37|