比较硬件pragma,pipeline,里面的参数

for(int j = 0; j < dim; j++) {
    #pragma HLS PIPELINE
    #pragma HLS LOOP_TRIPCOUNT min=c_size_min max=c_size_max
        out[j] = in1[j] * in2[j];
//dim 40960
使用#pragma SDS data zero_copy(in1[0:dim], in2[0:dim], out[0:dim])后的优化

在complexf-1中 

//loop tripcount constant
    const int c_size = DATA_SIZE;//64

    //Local memory to store input and output matrices
    int local_in1[MAX_SIZE][MAX_SIZE];//64*64
    int local_in2[MAX_SIZE][MAX_SIZE];
    int local_out[MAX_SIZE][MAX_SIZE];

    //Physical implementation of memories have only a limited number of read/write
    //ports, that can be overcome by using the ARRAY_PARTITION pragma
//    #pragma HLS ARRAY_PARTITION variable=local_in1 complete dim=2
//    #pragma HLS ARRAY_PARTITION variable=local_in2 complete dim=1

    //When loop_3 is unrolled automatically, the column "k" in local_in1[i][k] is variable
    //whereas in case of local_in2[k][j] the row "k" is variable. So, for effective pipelined
    //processing, local_in1 has been partitioned in dimension 2 and local_in2 is
    //partitioned in dimension 1.

    //Burst read on input matrices local_in1 and local_in2 from DDR memory.
    read_in: for(int iter = 0, i=0, j=0; iter< dim*dim; iter++,j++){
    #pragma HLS PIPELINE
    #pragma HLS LOOP_TRIPCOUNT min=c_size*c_size max=c_size*c_size
        if( j== dim){ j = 0; i++; }
        local_in1[i][j] = in1[iter].real();
        local_in2[i][j] = in2[iter];
    }

    //Reads the input_data from local memory, performs the
    //computations and writes the data to local memory.
    loop_1: for (int i = 0 ; i < dim ; i++){
    #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
        loop_2: for(int j = 0 ; j < dim ; j++){
        #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
        //Pipelining a loop results in automatic unrolling of inner loops by the HLS compiler.
        #pragma HLS PIPELINE
            int res = 0;
            loop_3: for(int k = 0; k < c_size; k++){
//#pragma HLS PIPELINE 12/15/3808/2401
		//To enable automatic unrolling of loop, the no. of iterations
		//need to be a compile time constant, so 'c_size' is specified
		//here instead of 'dim', which is not a compile time constant.
                res += local_in1[i][k]*local_in2[k][j];
            }
            local_out[i][j] = res;

当改成640时报错,BRAM不够

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值