汇编语言的FFT
C语言的FFT在单片机系统运行效率低。汇编会大大提高速度
定点DSP,C28x定点库,
此版本包含TI定点库的FFT和滤波器模块。其他模块可能会在未来的版本中添加。FFT、FIR和IIR类别下的函数是为每个模块定义的结构的成员函数,应该通过结构对象调用,而不是直接调用函数。位反转函数是个例外,它只能直接调用。默认宏用于帮助用户初始化模块对象,并确保将正确的值写入对象的每个元素。这一点很重要,尤其是在复杂和真实FFT模块的情况下。它们都使用复数FFT函数FFT32_calc(),不同之处在于,N点实FFT是通过运行N/2点复数FFT,然后执行拆分操作来完成的。因此,用户必须用正确的FFT大小、旋转因子跳过率和级数实例化RFFT对象。使用初始化宏可以使任务简单无误。下表列出了功能及其原型:
实数FFT
// An highlighted block
; DESCRIPTION:
;
; This function computes a real FFT. The input buffer must be aligned to
; a multiple of the FFT size. If it is not aligned then the output buffer
; will yield invalid results. If you do not wish to align the input buffer
; then use the RFFT_f32u function. Using this function will reduce cycle
; performance of the algorithm.
;
; FUNCTIONS:
;
; void RFFT_f32 (FFT_REAL *)
; void RFFT_f32_mag (FFT_REAL *)
; void RFFT_f32_phase (FFT_REAL *)
;
; Where RFFT_F32_STRUCT is a structure defined as:
;
; typedef struct {
; float32 *InBuf;
; float32 *OutBuf;
; float32 *CosSinBuf;
; float32 *MagBuf;
; float32 *PhaseBuf;
; Uint16 FFTSize;
; Uint16 FFTStages;
; } RFFT_F32_STRUCT;
;
; ASSUMPTIONS:
;
; * FFTSize must be a power of 2 (32, 64, 128, etc)
; * FFTSize must be greater or equal to 32
; * FFTStages must be log2(FFTSize)
; * InBuf, OutBuf, CosSinBuf are FFTSize in length
; * MagBuf and PhaseBuf are FFTSize/2 in length
; * MagBuf and PhaseBuf are not used by this function.
; They are only used by the magitude and phase calculation functions.
;
; ALGORITHUM:
;
; 1) Bit reverse input data and calculate stages 1, 2 & 3:
;
; In Buf (read in bit reverse order) Out Buf
; +----+ +----+
; | I1 | (((I1 + I2) + (I3 + I4)) + ((I5 + I6) + (I7 + I8)))/8 -> | I1'|
; | I2 | ((I1 - I2) + COS*((I5 - I6) + (I8 - I7)) )/8 -> | I2'|
; | I3 | ((I1 + I2) - (I3 + I4) )/8 -> | I3'|
; | I4 | ((I1 - I2) - COS*((I5 - I6) + (I8 - I7)) )/8 -> | I4'|
; | I5 | (((I1 + I2) + (I3 + I4)) - ((I5 + I6) + (I7 + I8)))/8 -> | I5'|
; | I6 | (COS*((I8 - I7) - (I5 - I6)) - (I4 - I3) )/8 -> | I6'|
; | I7 | ((I7 + I8) - (I5 + I6) )/8 -> | I7'|
; | I8 | (COS*((I8 - I7) - (I5 - I6)) + (I4 - I3) )/8 -> | I8'|
; .
; .
; \|/
; Repeat above FFTSize/8 (i.e. if FFTSize = 1024, Repeat = 128 times)
;
; Note: COS = COS( 1*2*PI/8) = SIN( 1*2*PI/8)
;
; 2) Calculate stages 4 and up:
;
;
; Out Buf 4 5 6 7 8 9 10 <- Stages
; +- +---------+ - - - - - - --
; | | Y1 | 0 0 0 0 0 0 0 <- Y1 + Y3
; | |---------|
; | | X(I1) | 1 1 1 1 1 1 1 <- X(I1) + [X(I3)*COS + X(I4)*SIN]
; | |---------|
; | | . |
; | | . | 3 7 15 31 63 127 255 <- Inner Loop Repeat Times
; | | \./ |
; | I |---------|
; | N | Y2 | 4 8 16 32 64 128 256 <- Y2
; | N |---------|
; | E | /.\ |
; | R | . |
; | | . |
; | L |---------|
; | O | X(I2) | 7 15 31 63 127 255 511 <- X(I1) - [X(I3)*COS + X(I4)*SIN]
; | O |---------|
; | P | Y3 | 8 16 32 64 128 256 512 <- Y1 - Y3
; | |---------|
; | | X(I3) | 9 17 33 65 129 257 513 <- [X(I4)*COS - X(I3)*SIN] - X(I2)
; | |---------|
; | | . |
; | | . |
; | | \./ |
; | |---------|
; | | Y4 |12 24 48 96 192 384 768 <- -Y4
; | |---------|
; | | /.\ |
; | | . |
; | | . |
; | |---------|
; | | X(I4) |15 31 63 127 255 511 1023 <- [X(I4)*COS - X(I3)*SIN] + X(I2)
; +- |---------|
; | |16 32 64 128 256 512 1024
; -- --- --- --- --- --- ----
; . 1 <- Outer Loop Repeat Times (16 FFT)
; . 2 1 <- Outer Loop Repeat Times (32 FFT)
; . 4 2 1 <- Outer Loop Repeat Times (64 FFT)
; . 8 4 2 1 <- Outer Loop Repeat Times (128 FFT)
; . 16 8 4 2 1 <- Outer Loop Repeat Times (256 FFT)
; . 32 16 8 4 2 1 <- Outer Loop Repeat Times (512 FFT)
; . 64 32 16 8 4 2 1 <- Outer Loop Repeat Times (1024 FFT)
; .
; .
; \|/
; ###########################################################################
; $TI Release: C28x Floating Point Unit Library V1.31 $
; $Release Date: Sep 10, 2012 $
; ###########################################################################
;===========================================================================
; Function: void RFFT_f32(FFT_REAL *fft)
;===========================================================================
; RFFT_f32_Stages1and2and3andBitReverse(RFFT_F32_STRUCT *);
; RFFT_f32_Stages4andUp(RFFT_F32_STRUCT *);
;
.global _RFFT_f32
.sect .text
_RFFT_f32:
ADDB SP,#2
MOVL *-SP[2],XAR4
LCR _rfft_f32_Stages1and2and3andBitReverse
MOVL XAR4,*-SP[2]
LCR _rfft_f32_Stages4andUp
SUBB SP,#2
LRETR
;===========================================================================
; Function: void rfft_f32_Stages1and2and3andBitReverse(RFFT_F32_STRUCT *rfft)
;===========================================================================
;---------------------------------------------------------------------------
;
; DESCRIPTION:
;
; This function bit reverses the input and computes stages 1, 2 and 3
;
; ON ENTRY:
;
; XAR4 = Starting address of the RFFT_F32_STRUCT structure
;
; REGISTER USAGE:
;
; AR0, XAR1, XAR2, XAR4, XAR5, AR3, AR6, XAR7, ACC,
; R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H
;
; On Exit:
;
; RFFT_F32_STRUCT OutBuf contains the computed result
;
;---------------------------------------------------------------------------
;offsets to the stack which holds the bit reversed elements
;---------------------------------------------------------------------------
I1 .set 06H
I2 .set 08H
I3 .set 0AH
I4 .set 0CH
I5 .set 0EH
I6 .set 10H
I7 .set 12H
I8 .set 14H
cossinBuf .set 04H
;--------------------------------------------------------------------------------
;Offset to the stack which holds intermediate results of FFT computation
;-------------------------------------------------------------------------------
.global _rfft_f32_Stages1and2and3andBitReverse
.text
_rfft_f32_Stages1and2and3andBitReverse:
;----------------------------------------------------------------------
; Save all save-on-entry registers used
;----------------------------------------------------------------------
PUSH XAR1
PUSH XAR2
PUSH XAR3
MOV32 *SP++,R4H
MOV32 *SP++,R5H
MOV32 *SP++,R6H
MOV32 *SP++,R7H
ADDB SP,#14h
MOVL XAR2,*+XAR4[0] ; &Inbuf
MOVL XAR5,*+XAR4[2] ; &Outbuf
MOVL XAR3,*+XAR4[2]
ADDB XAR3,#8 ; &Outbuf[4]
MOVL XAR7,*+XAR4[4] ; &CosSinbuf
MOV AR0,#0Ah
MOV AH,*+XAR4[AR0] ; FFT SIZE
MOV AR0,AH
LSR AH,3
SUBB AH,#1 ; (Size / 8) - 1
MOVL XAR1,#0000h ; index if memory is not aligned
RPTB _rfft_32_Last, AH
;--------------------------------------------------------------------------------
; Input buffer must be aligned for this code
;--------------------------------------------------------------------------------
NOP *,ARP2
MOVL ACC,*BR0++
MOVL *-SP[I1],ACC ;I1
MOVL ACC,*BR0++
MOVL *-SP[I2],ACC ;I2
MOVL ACC,*BR0++
MOVL *-SP[I3],ACC ;I3
MOVL ACC,*BR0++
MOVL *-SP[I4],ACC ;I4
MOVL ACC,*BR0++
MOVL *-SP[I5],ACC ;I5
MOVL ACC,*BR0++
MOVL *-SP[I6],ACC ;I6
MOVL ACC,*BR0++
MOVL *-SP[I7],ACC ;I7
MOVL ACC,*BR0++
MOVL *-SP[I8],ACC ;I8
;-------------------------------------------------------------------------------
; Computations for stages 1 2, and 3
; OutBufIndex++ = (I1 + I2) + (I3 + I4) + (I5 + I6) + (I7 + I8); (A) <- XAR5
; OutBufIndex++ = (I1 - I2) + COS x ((I5 - I6) + (I8 - I7)); (B)
; OutBufIndex++ = (I1 + I2) - (I3 + I4); (C)
; OutBufIndex++ = (I1 - I2) - COS x ((I5 - I6) + (I8 - I7)); (D)
; OutBufIndex++ = ((I1 + I2) + (I3 + I4)) - ((I5 + I6) + (I7 + I8)); (E) <- XAR3
; OutBufIndex++ = COS x ((I8 - I7) - (I5 - I6)) - (I4 - I3); (F)
; OutBufIndex++ = (I7 + I8) - (I5 + I6); (G)
; OutBufIndex++ = COS x ((I8 - I7) - (I5 - I6)) + (I4 - I3); (H)
;-------------------------------------------------------------------------------
MOV32 R0H, *-SP[I8] ; R0H = I8
MOV32 R1H, *-SP[I7] ; R1H = I7
SUBF32 R2H, R0H, R1H ; R2H = I8-I7
|| MOV32 R3H, *-SP[I5] ; R3H = I5
ADDF32 R0H, R1H, R0H ; R0H = I7+I8
|| MOV32 R4H, *-SP[I6] ; R4H = I6
SUBF32 R1H, R3H, R4H ; R1H = I5-I6
|| MOV32 R5H, *-SP[I1] ; R5H = I1
ADDF32 R3H, R3H, R4H ; R3H = I5+I6
ADDF32 R4H, R1H, R2H ; R4H = (I5-I6) + (I8-I7)
|| MOV32 R6H,*XAR7 ; R6H = COS
SUBF32 R7H, R0H, R3H ; R7H = (I7+I8) - (I5+I6) (G)
MPYF32 R4H, R6H, R4H ; R4H = COS x ((I5-I6) + (I8-I7))
|| SUBF32 R2H, R2H, R1H ; R2H = (I8-I7) - (I5-I6)
ADDF32 R0H, R3H, R0H ; R0H = (I5+I6) + (I7+I8)
|| MOV32 *+XAR3[4], R7H ; store G
MPYF32 R2H, R6H, R2H ; R2H = COS x ((I8-I7) - (I5-I6))
|| MOV32 R7H, *-SP[I2] ; R7H = I2
SUBF32