vivado hls fft设计

最新推荐文章于 2024-09-27 19:15:09 发布

shichaog

最新推荐文章于 2024-09-27 19:15:09 发布

阅读量1.3w

点赞数 5

分类专栏： fpga 文章标签： matlab fft vivado hls

本文链接：https://blog.csdn.net/shichaog/article/details/50811449

版权

fpga 专栏收录该内容

19 篇文章 6 订阅

订阅专栏

HLS的FFT设计步骤

本文基于HLS设计example，FFT > fft_single，其为1024点pipelined streamimg I/O算法。

大体代码流程如下：

1.包含hls_fft.h库

#include "hls_fft.h"

2.设置预先定义的hls::ip_fft::params_t命名空间中的类成员

struct hls::ip_fft::params_t

3.定义运行时设置，这时axis-4接口的一部分，用于动态调节FFT的一些参数

4.调用FFT函数

hls::fft<param1> (xn1, xk1, &fft_status1, &fft_config1);

5.检查运行结果，该过程可选，用于检测是否溢出。

us详细过程如下：

首先定义一个头文件fft_top.h，该文件include进FFT头文件。

前几行的定义输入/输出数据是16bit，FFT点数是（1<<10）1024点。

紧接着params_t的ordering_opt和config_width长度进行了重新设置，一个是输出顺序，一个是axis-4接口配置数据位宽。

typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;

将数据变成了以1位表示整数，15bit表示浮点数。

然后声明了三个函数。第一个函数用于声明axis-4参数设置，第二个函数是fft结果的status判断，第三个函数是vivado中fft ip调用。

#include "ap_fixed.h"
#include "hls_fft.h"

// configurable params
const char FFT_INPUT_WIDTH                     = 16;
const char FFT_OUTPUT_WIDTH                    = FFT_INPUT_WIDTH;
const char FFT_CONFIG_WIDTH                    = 16;
const char FFT_NFFT_MAX                        = 10; 
const int  FFT_LENGTH                          = 1 << FFT_NFFT_MAX; 


#include <complex>
using namespace std;

struct config1 : hls::ip_fft::params_t {
    static const unsigned ordering_opt = hls::ip_fft::natural_order;
    static const unsigned config_width = FFT_CONFIG_WIDTH;
};

typedef hls::ip_fft::config_t<config1> config_t;
typedef hls::ip_fft::status_t<config1> status_t;

typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;
typedef ap_fixed<FFT_OUTPUT_WIDTH,FFT_OUTPUT_WIDTH-FFT_INPUT_WIDTH+1> data_out_t;
typedef std::complex<data_in_t> cmpxDataIn;
typedef std::complex<data_out_t> cmpxDataOut;

void dummy_proc_fe(
    bool direction,
    config_t* config, 
    cmpxDataIn in[FFT_LENGTH], 
    cmpxDataIn out[FFT_LENGTH]);

void dummy_proc_be(
    status_t* status_in, 
    bool* ovflo,
    cmpxDataOut in[FFT_LENGTH], 
    cmpxDataOut out[FFT_LENGTH]);

void fft_top(
    bool direction,
    cmpxDataIn in[FFT_LENGTH],
    cmpxDataOut out[FFT_LENGTH],
    bool* ovflo);

有了头文件，接下来就是testbench调用fft方法来完成fft了。首先读入16bit数，然后转换成浮点数

int main()
{
    const int SIM_FRAMES = 1;
    const int SAMPLES = (1 << FFT_NFFT_MAX);

    int error_num = 0;
    bool ovflo_all = false;
    char res_filename[BUF_SIZE]={0};
    char dat_filename[BUF_SIZE]={0};
    static cmpxDataIn xn_input[SAMPLES];
    static cmpxDataOut xk_output[SAMPLES];

    for (int frame = 0; frame < SIM_FRAMES; ++frame)
    {
        int NFFT = 0;
        int CP_LEN = 0; // length of the cyclic prefix to be inserted for each frame
        int FWD_INV = 0;
        int sc_sch = 0;
        int line_no = 1;
        FILE *stimfile;

        // Open stimulus .dat file for reading
        sprintf(dat_filename, "stimulus_%02d", frame);
        strcat(dat_filename,".dat");
        stimfile = fopen(dat_filename, "r");
        
        int tmp_re, tmp_im;
        float dummy_re, dummy_im;
        const int max = 1 << FFT_INPUT_WIDTH; // might not work for > 32 bits!
        const int max_half_minus_one = (max/2)-1;
        // Scaling factor to get integer into -1 <= x < +1 range 
        const double sc = ldexp(1.0, FFT_INPUT_WIDTH-1); // might not work for > 32 bits!

        if (stimfile == NULL)
        {
            printf("ERROR: Can't open %s\n",dat_filename);
            exit(999);
        }
        else
        {
            printf("INFO: Reading %s\n",dat_filename);
            while (fgetc(stimfile) != EOF && line_no < SAMPLES+5)
            {
                switch (line_no)
                {
                case 1:
                  // Point size
                  fscanf(stimfile,"%X",&NFFT);
                  printf("NFFT %d\n",NFFT);
                  break;
                case 2:
                  // CP length
                  fscanf(stimfile,"%X",&CP_LEN);
                  printf("CP_LEN %d\n",CP_LEN);
                  break;
                case 3:
                  // fwd-inv
                  fscanf(stimfile,"%X",&FWD_INV);
                  printf("FWD_INV %d\n",FWD_INV);
                  break;
                case 4:
                  // Scaling schedule sc_sch
                  fscanf(stimfile,"%X",&sc_sch);
                  printf("sc_sch %X\n",sc_sch);
                  break;
                default:
                    // hex data (first 2 columns)
                    fscanf(stimfile,"%x %x %f %f",&tmp_re,&tmp_im,&dummy_re,&dummy_im);
                    //printf("%x %x\n",tmp_re,tmp_im);

                    double input_data_re, input_data_im;
                    if (tmp_re > max_half_minus_one) {
                      input_data_re = ((tmp_re-65536)/sc);
                    } else {
                      input_data_re = (tmp_re/sc);
                    }
                    //xn_input[line_no-5].re = input_data_re;
                    //xn_re_hw[line_no-5] = dummy_re;

                    if (tmp_im > max_half_minus_one) {
                      input_data_im = ((tmp_im-65536)/sc);
                    } else {
                      input_data_im = (tmp_im/sc);
                    }
                    //xn_input[line_no-5].im = input_data_im;
                    //xn_im_hw[line_no-5] = dummy_im;

                    xn_input[line_no-5] = cmpxDataIn(input_data_re, input_data_im);

                }
                line_no++;
            }
        }
        fclose(stimfile);

然后调用fft和完成fft变换

fft_top(FWD_INV, xn_input, xk_output, &ovflo);

至此，fft已经算是完成了，接下来就是读入预先评估的result，比对结果了。

  FILE* resfile;
        sprintf(res_filename, "stimulus_%02d", frame);
        strcat(res_filename,".res");
        if ((resfile = fopen(res_filename, "r")) == 0)
        {
            printf("ERROR: Can't open %s\n", res_filename);
            exit(888);
        }

        int tmp;
        fscanf(resfile, "%X", &tmp);
        fscanf(resfile, "%X", &tmp);
        for (int i = 0; i < (1<<NFFT); i++)
        {
            fscanf(resfile,"%x %x %f %f", &tmp_re, &tmp_im, &dummy_re, &dummy_im);
            data_out_t golden = dummy_re;
            //if (golden != xk_output[i].re)
            if (golden != xk_output[i].real())
            {
                error_num++;
                cout << "Frame:" << frame << " index: " << i 
                     << "  Golden: " <<  golden.to_float() << " vs. RE Output: " << setprecision(14) << xk_output[i].real().to_float() << endl;
            }
            golden = dummy_im;
            //if (golden != xk_output[i].im)
            if (golden != xk_output[i].imag())
            {
                error_num++;
                cout << "Frame:" << frame << " index: " << i 
                     << "  Golden: " << golden.to_float() << " vs. IM Output: " << setprecision(14) << xk_output[i].imag().to_float() << endl;
            }
        }
        fclose(resfile);
    }

    cout << " ERRORS: " << error_num << endl;
    if (error_num > 0)
        cout << " (FAILED!!!)" << endl;
    else if (ovflo_all)   
        cout << " (OVERFLOW!!!)" << endl;
    else
        cout << " (PASSED!!!)" << endl;

    if (error_num > 0)
        return 1;
    else
        return 0;
}

其调用的fft所在的文件是fft_top是fft_top.c文件里的函数，该函数将被做成ip，

#include "fft_top.h"

void dummy_proc_fe(
    bool direction,
    config_t* config, 
    cmpxDataIn in[FFT_LENGTH], 
    cmpxDataIn out[FFT_LENGTH])
{
    int i; 
    config->setDir(direction);
    config->setSch(0x2AB);
    for (i=0; i< FFT_LENGTH; i++)
        out[i] = in[i];
}

void dummy_proc_be(
    status_t* status_in, 
    bool* ovflo,
    cmpxDataOut in[FFT_LENGTH], 
    cmpxDataOut out[FFT_LENGTH])
{
    int i; 
    for (i=0; i< FFT_LENGTH; i++)
        out[i] = in[i];
    *ovflo = status_in->getOvflo() & 0x1;
}


void fft_top(
    bool direction,
    complex<data_in_t> in[FFT_LENGTH],
    complex<data_out_t> out[FFT_LENGTH],
    bool* ovflo)
{
#pragma HLS interface ap_hs port=direction
#pragma HLS interface ap_fifo depth=1 port=ovflo
#pragma HLS interface ap_fifo depth=FFT_LENGTH port=in,out
#pragma HLS data_pack variable=in
#pragma HLS data_pack variable=out
#pragma HLS dataflow
    complex<data_in_t> xn[FFT_LENGTH];
    complex<data_out_t> xk[FFT_LENGTH];
    config_t fft_config;
    status_t fft_status;
   
    dummy_proc_fe(direction, &fft_config, in, xn);
    // FFT IP
    hls::fft<config1>(xn, xk, &fft_status, &fft_config);
    dummy_proc_be(&fft_status, ovflo, xk, out);
}

dummy_proc_fe函数做了两个工作，一个是设置是fft还是逆fft，另外一个是设置scale值即0x2ab，也即[2 2 2 2 3]，即蝶形算法的每一级右移的位数，这样确保最后的结果也是16位的。所以scale对应的十进制值是2^2+2^2+2^2+2^2+2^3=2048。

再来看看仿真的数据和结果：

数据见stimulus_00.dat文件，这里截取部分片段：

0A
0
1
2AB
A437 4C07 -0.71707153320312  0.59396362304688
6015 333C  0.75064086914062  0.40026855468750
B251 FFA6 -0.60690307617188 -0.00274658203125
FD76 85F5 -0.01983642578125 -0.95346069335938

stimulus_00.res文件的结果如下（部分）：

00
0
0076 FEF9  0.00360107421875 -0.00802612304688 
00C5 009C  0.00601196289062  0.00476074218750 
00DD FEAF  0.00674438476562 -0.01028442382812 
0103 0014  0.00790405273438  0.00061035156250 
0052 0048  0.00250244140625  0.00219726562500 
011F 00D5  0.00875854492188  0.00650024414062 
FFE9 FFFA -0.00070190429688 -0.00018310546875 
FF2C 013F -0.00646972656250  0.00973510742188

前面两行的并没有什么用，实际使用时发现如果不在真正数据前放写数，读入会出错，似乎是vivado_hls的一个bug。

matlab的结果如下（部分）：

7.511932373046726 - 16.318267822265767i
12.4071061347653 + 9.84830812777682i
13.9168538790000 - 20.9499609691262i
16.3122678174807 + 1.35817962557640i
5.22772324927469 + 4.60450708754723i
18.0549676502232 + 13.4125569611171i
-1.34364834348118 - 0.280272736963835i
-13.1356602407057 + 20.0124305673796i
0.454103601471324 + 22.6374966808317i
-6.94603384852326 - 1.77684245234773i
-0.526709138800964 - 38.7395841354917i
14.6932655039269 + 6.50847432331556i
11.2043508653130 + 23.3736128226132i
20.8573462890038 - 4.58878154007059i
3.49584233061040 - 7.66236741383999i
-30.2315244786224 - 20.9903229972919i

这里我开始也是困惑了，和matlab结果相差比较大。这是由于scale的原因。

7.51193237304673/2048= 0.0037
<pre name="code" class="cpp">16.318267822265767/2048 = 0.008

其它项依次类推。

值得注意的是，由于采用了scaling策略，这必然会导致可能存在一些问题，比如如果做了fft然后将两个fft的一维数组进行共轭相乘，然后在ifft，这样的话存在一个问题，那就是scaling可能存在问题，所以这时可能采用float型更合适，修改的方法也很简单，只需要将fft_top.h中关于data_in_t和data_out_t进行重新定义，定义的方法如下：

typedef float data_in_t;
typedef float data_out_t;

这样就会调用浮点数IP核进行运算了。

但是还有一个地方需要更改，是因为浮点数要求phase factor必须是24或者25bit的。

struct config1 : hls::ip_fft::params_t {
    static const unsigned ordering_opt = hls::ip_fft::natural_order;
    static const unsigned phase_factor_width = 24;
    static const unsigned config_width = FFT_CONFIG_WIDTH;
};

IP的参数设置必须满足以下的要求：

    / IP parameters legality checking /

    // Check CONFIG_T::config_width
    config_ch->checkBitWidth(FFT_DATA_FORMAT);

    // Check CONFIG_T::status_width
    status->checkBitWidth();

    // Check ip parameters
    if (CONFIG_T::channels < 1 || CONFIG_T::channels > 12)
    {
        std::cerr << ip_fft::fftErrChkHead << "Channels = " << (int)CONFIG_T::channels
                  << " is illegal. It should be from 1 to 12."
                  << std::endl;
        exit(1);
    }

    if (CONFIG_T::max_nfft < 3 || CONFIG_T::max_nfft > 16)
    {
        std::cerr << ip_fft::fftErrChkHead << "NFFT_MAX = " << (int)CONFIG_T::max_nfft 
                  << " is illegal. It should be from 3 to 16."
                  << std::endl;
        exit(1);
    }

    unsigned length = FFT_LENGTH;
    if (!CONFIG_T::has_nfft)
    {
        if (FFT_LENGTH != (1 << CONFIG_T::max_nfft))
        {
            std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
                      << " is illegal. Log2(FFT_LENGTH) should equal to NFFT_MAX when run-time configurable length is disabled."
                      << std::endl;
            exit(1);
        }
    }
    else if (length & (length - 1))
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
                  << " is illegal. It should be the integer power of 2."
                  << std::endl;
        exit(1);
    }
    else if (NFFT < 3 || NFFT > 16)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
                  << " is illegal. Log2(FFT_LENGTH) should be from 3 to 16."
                  << std::endl;
        exit(1);
    }
    else if (NFFT > CONFIG_T::max_nfft)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
                  << " is illegal. Log2(FFT_LENGTH) should be less than or equal to NFFT_MAX."
                  << std::endl;
        exit(1);
    } 
#if 0
    else if (NFFT != config_ch->getNfft())
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
                  << " is illegal. Log2(FFT_LENGTH) should equal to NFFT field of configure channel."
                  << std::endl;
        exit(1);
    }
#endif

    if ((FFT_INPUT_WIDTH < 8) || (FFT_INPUT_WIDTH > 40))
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_INPUT_WIDTH = " << (int)FFT_INPUT_WIDTH
                  << " is illegal. It should be 8,16,24,32,40."
                  << std::endl;
        exit(1);
    }

    if (CONFIG_T::scaling_opt == ip_fft::unscaled && FFT_DATA_FORMAT != ip_fft::floating_point)
    {
        unsigned golden = FFT_INPUT_WIDTH + CONFIG_T::max_nfft + 1;
        golden = ((golden + 7) >> 3) << 3;
        if (FFT_OUTPUT_WIDTH != golden)
        {
            std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH
                      << " is illegal with unscaled arithmetic. It should be input_width+nfft_max+1."
                      << std::endl;
            exit(1);
        }
    }
    else if (FFT_OUTPUT_WIDTH != FFT_INPUT_WIDTH)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH
                  << " is illegal. It should be the same as input_width."
                  << std::endl;
        exit(1);
    }

    if (CONFIG_T::channels > 1 && CONFIG_T::arch_opt == ip_fft::pipelined_streaming_io)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels << " and FFT_ARCH = pipelined_streaming_io"
                  << " is illegal. pipelined_streaming_io architecture is not supported when channels is bigger than 1."
                  << std::endl;
        exit(1);
    }

    if (CONFIG_T::channels > 1 && FFT_DATA_FORMAT == ip_fft::floating_point)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels
                  << " is illegal with floating point data format. Floating point data format only supports 1 channel."
                  << std::endl;
        exit(1);
    }

    if (FFT_DATA_FORMAT == ip_fft::floating_point)
    {
        if (CONFIG_T::phase_factor_width != 24 && CONFIG_T::phase_factor_width != 25)
        {
            std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width
                      << " is illegal with floating point data format. It should be 24 or 25."
                      << std::endl;
            exit(1);
        }
    } 
    else if (CONFIG_T::phase_factor_width < 8 || CONFIG_T::phase_factor_width > 34)
    {
        std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width
                  << " is illegal. It should be from 8 to 34."
                  << std::endl;
        exit(1);
    }