FFT的汇编

汇编语言的FFT

C语言的FFT在单片机系统运行效率低。汇编会大大提高速度

定点DSP,C28x定点库,

此版本包含TI定点库的FFT和滤波器模块。其他模块可能会在未来的版本中添加。FFT、FIR和IIR类别下的函数是为每个模块定义的结构的成员函数,应该通过结构对象调用,而不是直接调用函数。位反转函数是个例外,它只能直接调用。默认宏用于帮助用户初始化模块对象,并确保将正确的值写入对象的每个元素。这一点很重要,尤其是在复杂和真实FFT模块的情况下。它们都使用复数FFT函数FFT32_calc(),不同之处在于,N点实FFT是通过运行N/2点复数FFT,然后执行拆分操作来完成的。因此,用户必须用正确的FFT大小、旋转因子跳过率和级数实例化RFFT对象。使用初始化宏可以使任务简单无误。下表列出了功能及其原型:在这里插入图片描述

实数FFT

// An highlighted block
; DESCRIPTION:
;
;    This function computes a real FFT.  The input buffer must be aligned to
;    a multiple of the FFT size.  If it is not aligned then the output buffer
;    will yield invalid results.  If you do not wish to align the input buffer
;    then use the RFFT_f32u function.  Using this function will reduce cycle
;    performance of the algorithm.
;
; FUNCTIONS:
;
;    void RFFT_f32 (FFT_REAL *)
;    void RFFT_f32_mag (FFT_REAL *)
;    void RFFT_f32_phase (FFT_REAL *)
;
;    Where RFFT_F32_STRUCT is a structure defined as:
;
;    typedef struct {
   
;      float32  *InBuf;
;      float32  *OutBuf;
;      float32  *CosSinBuf;
;      float32  *MagBuf;
;      float32  *PhaseBuf;
;      Uint16 FFTSize;
;      Uint16 FFTStages;
;    } RFFT_F32_STRUCT;
;
; ASSUMPTIONS:
;
;     * FFTSize must be a power of 2 (32, 64, 128, etc)
;     * FFTSize must be greater or equal to 32
;     * FFTStages must be log2(FFTSize)
;     * InBuf, OutBuf, CosSinBuf are FFTSize in length
;     * MagBuf and PhaseBuf are FFTSize/2 in length
;     * MagBuf and PhaseBuf are not used by this function.
;       They are only used by the magitude and phase calculation functions.
;
; ALGORITHUM:
;
; 1) Bit reverse input data and calculate stages 1, 2 & 3:
;
;  In Buf (read in bit reverse order)                             Out Buf
;  +----+                                                          +----+
;  | I1 | (((I1 + I2) + (I3 + I4)) + ((I5 + I6) + (I7 + I8)))/8 -> | I1'|
;  | I2 | ((I1 - I2) + COS*((I5 - I6) + (I8 - I7))          )/8 -> | I2'|
;  | I3 | ((I1 + I2) - (I3 + I4)                            )/8 -> | I3'|
;  | I4 | ((I1 - I2) - COS*((I5 - I6) + (I8 - I7))          )/8 -> | I4'|
;  | I5 | (((I1 + I2) + (I3 + I4)) - ((I5 + I6) + (I7 + I8)))/8 -> | I5'|
;  | I6 | (COS*((I8 - I7) - (I5 - I6)) - (I4 - I3)          )/8 -> | I6'|
;  | I7 | ((I7 + I8) - (I5 + I6)                            )/8 -> | I7'|
;  | I8 | (COS*((I8 - I7) - (I5 - I6)) + (I4 - I3)          )/8 -> | I8'|
;     .
;     .
;    \|/
; Repeat above FFTSize/8 (i.e. if FFTSize = 1024, Repeat = 128 times)
;
; Note: COS = COS( 1*2*PI/8) = SIN( 1*2*PI/8)
;
; 2) Calculate stages 4 and up:
;
;
;             Out Buf   4   5   6   7   8   9   10  <- Stages
; +-        +---------+ -   -   -   -   -   -   --
; |         |    Y1   | 0   0   0   0   0   0    0  <-   Y1 + Y3
; |         |---------|
; |         |  X(I1)  | 1   1   1   1   1   1    1  <-   X(I1) + [X(I3)*COS + X(I4)*SIN]
; |         |---------|
; |         |    .    |
; |         |    .    | 3   7  15  31  63 127  255  <-   Inner Loop Repeat Times
; |         |   \./   |
; |  I      |---------|
; |  N      |    Y2   | 4   8  16  32  64 128  256  <-   Y2
; |  N      |---------|
; |  E      |   /.\   |
; |  R      |    .    |
; |         |    .    |
; |  L      |---------|
; |  O      |  X(I2)  | 7  15  31  63 127 255  511 <-   X(I1) - [X(I3)*COS + X(I4)*SIN]
; |  O      |---------|
; |  P      |    Y3   | 8  16  32  64 128 256  512 <-   Y1 - Y3
; |         |---------|
; |         |  X(I3)  | 9  17  33  65 129 257  513 <-   [X(I4)*COS - X(I3)*SIN] - X(I2)
; |         |---------|
; |         |    .    |
; |         |    .    |
; |         |   \./   |
; |         |---------|
; |         |    Y4   |12  24  48  96 192 384  768 <-   -Y4
; |         |---------|
; |         |   /.\   |
; |         |    .    |
; |         |    .    |
; |         |---------|
; |         |  X(I4)  |15  31  63 127 255 511 1023 <-   [X(I4)*COS - X(I3)*SIN] + X(I2)
; +-        |---------|
;           |         |16  32  64 128 256 512 1024
;                      -- --- --- --- --- --- ----
;                .      1                          <-   Outer Loop Repeat Times (16   FFT)
;                .      2   1                      <-   Outer Loop Repeat Times (32   FFT)
;                .      4   2   1                  <-   Outer Loop Repeat Times (64   FFT)
;                .      8   4   2   1              <-   Outer Loop Repeat Times (128  FFT)
;                .     16   8   4   2   1          <-   Outer Loop Repeat Times (256  FFT)
;                .     32  16   8   4   2   1      <-   Outer Loop Repeat Times (512  FFT)
;                .     64  32  16   8   4   2    1 <-   Outer Loop Repeat Times (1024 FFT)
;                .
;                .
;               \|/
; ###########################################################################
; $TI Release: C28x Floating Point Unit Library V1.31 $
; $Release Date: Sep 10, 2012 $
; ###########################################################################


;===========================================================================
; Function: void RFFT_f32(FFT_REAL *fft)
;===========================================================================
; RFFT_f32_Stages1and2and3andBitReverse(RFFT_F32_STRUCT *);
; RFFT_f32_Stages4andUp(RFFT_F32_STRUCT *);
;
       .global      _RFFT_f32
       .sect       .text

_RFFT_f32:
       ADDB SP,#2
       MOVL *-SP[2],XAR4
       LCR  _rfft_f32_Stages1and2and3andBitReverse
       MOVL XAR4,*-SP[2]
       LCR  _rfft_f32_Stages4andUp
       SUBB SP,#2
       LRETR

;===========================================================================
; Function: void rfft_f32_Stages1and2and3andBitReverse(RFFT_F32_STRUCT *rfft)
;===========================================================================
;---------------------------------------------------------------------------
;
; DESCRIPTION:
;
;     This function bit reverses the input and computes stages 1, 2 and 3
;
; ON ENTRY:
;
;     XAR4 = Starting address of the RFFT_F32_STRUCT structure
;
; REGISTER USAGE:
;
;     AR0, XAR1, XAR2, XAR4, XAR5, AR3, AR6, XAR7, ACC,
;     R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H
;
; On Exit:
;
;     RFFT_F32_STRUCT OutBuf contains the computed result
;
;---------------------------------------------------------------------------
;offsets to the stack which holds the bit reversed elements
;---------------------------------------------------------------------------

I1         .set 06H
I2         .set 08H
I3         .set 0AH
I4         .set 0CH
I5         .set 0EH
I6         .set 10H
I7         .set 12H
I8         .set 14H
cossinBuf  .set 04H
;--------------------------------------------------------------------------------
;Offset to the stack which holds intermediate results of FFT computation
;-------------------------------------------------------------------------------
           .global  _rfft_f32_Stages1and2and3andBitReverse
           .text

_rfft_f32_Stages1and2and3andBitReverse:
;----------------------------------------------------------------------
;     Save all save-on-entry registers used
;----------------------------------------------------------------------
        PUSH     XAR1
        PUSH     XAR2
        PUSH     XAR3
        MOV32    *SP++,R4H
        MOV32    *SP++,R5H
        MOV32    *SP++,R6H
        MOV32    *SP++,R7H
        ADDB     SP,#14h

        MOVL     XAR2,*+XAR4[0]         ; &Inbuf
        MOVL     XAR5,*+XAR4[2]         ; &Outbuf
        MOVL     XAR3,*+XAR4[2]
        ADDB     XAR3,#8                ; &Outbuf[4]
        MOVL     XAR7,*+XAR4[4]         ; &CosSinbuf

        MOV      AR0,#0Ah
        MOV      AH,*+XAR4[AR0]         ; FFT SIZE
        MOV      AR0,AH
        LSR      AH,3
        SUBB     AH,#1                  ; (Size / 8) - 1
        MOVL     XAR1,#0000h            ; index if memory is not aligned

        RPTB    _rfft_32_Last, AH
;--------------------------------------------------------------------------------
;  Input buffer must be aligned for this code
;--------------------------------------------------------------------------------
        NOP     *,ARP2

        MOVL    ACC,*BR0++
        MOVL   *-SP[I1],ACC             ;I1

        MOVL    ACC,*BR0++
        MOVL    *-SP[I2],ACC            ;I2

        MOVL    ACC,*BR0++
        MOVL    *-SP[I3],ACC            ;I3

        MOVL    ACC,*BR0++
        MOVL    *-SP[I4],ACC            ;I4

        MOVL    ACC,*BR0++
        MOVL    *-SP[I5],ACC            ;I5

        MOVL    ACC,*BR0++
        MOVL    *-SP[I6],ACC            ;I6

        MOVL    ACC,*BR0++
        MOVL    *-SP[I7],ACC            ;I7

        MOVL    ACC,*BR0++
        MOVL    *-SP[I8],ACC            ;I8

;-------------------------------------------------------------------------------
; Computations for stages 1 2, and 3
;   OutBufIndex++ = (I1 + I2) + (I3 + I4) + (I5 + I6) + (I7 + I8);      (A)  <- XAR5
;   OutBufIndex++ = (I1 - I2) + COS x ((I5 - I6) + (I8 - I7));          (B)
;   OutBufIndex++ = (I1 + I2) - (I3 + I4);                              (C)
;   OutBufIndex++ = (I1 - I2) - COS x ((I5 - I6) + (I8 - I7));          (D)
;   OutBufIndex++ = ((I1 + I2) + (I3 + I4)) - ((I5 + I6) + (I7 + I8));  (E)  <- XAR3
;   OutBufIndex++ = COS x ((I8 - I7) - (I5 - I6)) - (I4 - I3);          (F)
;   OutBufIndex++ = (I7 + I8) - (I5 + I6);                              (G)
;   OutBufIndex++ = COS x ((I8 - I7) - (I5 - I6)) + (I4 - I3);          (H)
;-------------------------------------------------------------------------------
        MOV32       R0H, *-SP[I8]           ; R0H   = I8
        MOV32       R1H, *-SP[I7]           ; R1H   = I7
        SUBF32      R2H, R0H, R1H           ; R2H   = I8-I7
     || MOV32       R3H, *-SP[I5]           ; R3H   = I5
        ADDF32      R0H, R1H, R0H           ; R0H   = I7+I8
     || MOV32       R4H, *-SP[I6]           ; R4H   = I6
        SUBF32      R1H, R3H, R4H           ; R1H   = I5-I6
     || MOV32       R5H, *-SP[I1]           ; R5H   = I1
        ADDF32      R3H, R3H, R4H           ; R3H   = I5+I6
        ADDF32      R4H, R1H, R2H           ; R4H   = (I5-I6) + (I8-I7)
     || MOV32       R6H,*XAR7               ; R6H   = COS
        SUBF32      R7H, R0H, R3H           ; R7H   = (I7+I8) - (I5+I6)  (G)
        MPYF32      R4H, R6H, R4H           ; R4H   = COS x ((I5-I6) + (I8-I7))
     || SUBF32      R2H, R2H, R1H           ; R2H   = (I8-I7) - (I5-I6)
        ADDF32      R0H, R3H, R0H           ; R0H   = (I5+I6) + (I7+I8)
     || MOV32       *+XAR3[4], R7H          ; store G
        MPYF32      R2H, R6H, R2H           ; R2H   = COS x ((I8-I7) - (I5-I6))
     || MOV32       R7H, *-SP[I2]           ; R7H   = I2
        SUBF32      
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
FFT(快速傅里叶变换)是一种复杂的算法,用汇编实现需要耗费大量时间和精力。下面是一个简单的例子,可以实现8点FFT: ```assembly ;============================================= ; 8-point FFT in assembly language ;============================================= .model small .stack 100h .data x_real dw 4, 0, 4, 0, 4, 0, 4, 0 x_imag dw 0, 0, 0, 0, 0, 0, 0, 0 w_real dw 8, 0, -1, 7, 0, -8, 1, -7 w_imag dw 0, -8, 7, -1, 8, 0, -7, 1 .code main PROC mov ax, @data mov ds, ax ; do the FFT call fft_8 mov ax, 4c00h int 21h main ENDP ;--------------------------------------------- ; subroutines ;--------------------------------------------- fft_8 PROC push ax push bx push cx push dx ; first pass mov cx, 4 mov dx, 2 L1: mov bx, cx L2: mov ax, x_real[bx] add ax, x_real[bx+dx] mov x_real[bx+dx], ax sub ax, x_real[bx] mov x_real[bx], ax mov ax, x_imag[bx] add ax, x_imag[bx+dx] mov x_imag[bx+dx], ax sub ax, x_imag[bx] mov x_imag[bx], ax add bx, dx cmp bx, 8 jle L2 sub cx, 1 cmp cx, 0 jne L1 ; second pass mov cx, 2 mov dx, 4 L3: mov bx, cx mov ax, x_real[bx] mov si, w_real[bx] mov di, w_imag[bx] mov bx, dx L4: mov bp, x_real[bx] mov bx, x_imag[bx] mov bl, dil mov bh, sih mov si, x_real[bx] mov di, x_imag[bx] mov cl, dil mov ch, sih mov ax, bp mul cl mov bp, ax mov ax, bx mul bh sub bp, ax mov ax, si mul ch add bp, ax mov ax, di mul bl add bp, ax mov x_real[bx], bp mov ax, bp mov bx, si mul dil mov bp, ax mov ax, di mul sih sub bp, ax mov ax, bp mov bx, si mul sih add ax, di mov x_imag[bx], ax add bx, dx cmp bx, 8 jle L4 add cx, 1 cmp cx, 4 jle L3 pop dx pop cx pop bx pop ax ret fft_8 ENDP END main ``` 上述代码实现了一个简单的8点FFT。实现FFT需要了解复数运算、旋转因子等一系列概念,因此需要有一定的数学和计算机基础。同时,汇编语言的代码相对于高级语言更加底层,需要更加细致的调试和优化。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值