Sdx编译错误

一、最初的代码

xf_headers.h文件:

#ifndef _XF_HEADERS_H_
#define _XF_HEADERS_H_
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "ap_int.h"
#include "hls_stream.h"
#undef __ARM_NEON__
#undef __ARM_NEON
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#define __ARM_NEON__
#define __ARM_NEON
#include "sds_lib.h"
#define TIME_STAMP_INIT  unsigned int clock_start, clock_end;  clock_start = sds_clock_counter();
#define TIME_STAMP  { clock_end = sds_clock_counter(); printf("elapsed time %lu \n", clock_end-clock_start); clock_start = sds_clock_counter();  }
#include "common/xf_sw_utils.h"
#endif//_XF_HEADERS_H_

xf_accel_config.h

#ifndef _XF_THRESHOLD_CONFIG_H_
#define _XF_THRESHOLD_CONFIG_H_

#include "hls_stream.h"
#include "ap_int.h"

#include "common/xf_params.h"
#include "common/xf_common.h"
#include "common/xf_utility.h"

#include "imgproc/xf_threshold.hpp"
#include "imgproc/xf_convert_bitdepth.hpp"
#include "xf_config_params.h"

typedef ap_uint<8>      ap_uint8_t;
typedef ap_uint<64>      ap_uint64_t;


/*  set the height and weight  */
#define HEIGHT 192
#define WIDTH  448

#if RO
#define NPIX				XF_NPPC8
#endif                      
#if NO                      
#define NPIX				XF_NPPC1
#endif

void threshold_accel(xf::Mat<XF_16UC1, HEIGHT, WIDTH, NPIX> &_src,xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> &_bit8binary,xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> &_threshdst,ap_uint<16> thresh_upper,ap_uint<16> thresh_lower);

void convertvalve(ap_uint<1> stonesimg[HEIGHT*WIDTH],ap_uint<10> valve_column_[64],ap_uint<1> valvearray[HEIGHT*64]);

#endif  // end of _XF_THRESHOLD_CONFIG_H_

xf_threshold_accel.cpp文件:其中函数threshold_accel是我的一个hardware function。我想着以流的形式传输应该会很快(所以使用了access_pattern:SEQUENTIAL和copy);中间变量_bit8binary又写又读,所以只能是zero_copy共享区?!

#include "xf_accel_config.h"

#pragma SDS data access_pattern("_src.data":SEQUENTIAL, "_threshdst.data":SEQUENTIAL)
#pragma SDS data copy("_src.data"[0:"_src.size"], "_threshdst.data"[0:"_threshdst.size"])
#pragma SDS data mem_attribute("_src.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data mem_attribute("_threshdst.data":NON_CACHEABLE|PHYSICAL_CONTIGUOUS)
#pragma SDS data zero_copy("_bit8binary.data"[0:"_bit8binary.size"])
void threshold_accel(xf::Mat<XF_16UC1, HEIGHT, WIDTH, NPIX> &_src,xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> &_bit8binary,xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> &_threshdst,ap_uint<16> thresh_upper,ap_uint<16> thresh_lower)
{
#pragma HLS DATAFLOW
	xf::convertTo<XF_16UC1, XF_8UC1, HEIGHT, WIDTH, NPIX>(_src,_bit8binary,XF_CONVERT_16U_TO_8U,8);
	xf::Threshold<XF_THRESHOLD_TYPE_RANGE,XF_8UC1,HEIGHT, WIDTH,NPIX>(_bit8binary, _threshdst,0,thresh_upper,thresh_lower);
}

接下来是xf_valve_accel.cpp文件:其中convertvalve函数也是我的hardware function。这里我想随机访问这几个参数,而且都小于32MB,所以我用了号称最快的AXIDMA_SIMPLE传输。不知道这样优化对吗?

#include "xf_accel_config.h"

#pragma SDS data copy(stonesimg[0:HEIGHT*WIDTH],valve_column_[0:64],valvearray[0:HEIGHT*64])
#pragma SDS data access_pattern(stonesimg:RANDOM, valve_column_:RANDOM,valvearray:RANDOM)
#pragma SDS data data_mover(stonesimg:AXIDMA_SIMPLE, valve_column_:AXIDMA_SIMPLE,valvearray:AXIDMA_SIMPLE)
void convertvalve(ap_uint<1> stonesimg[HEIGHT*WIDTH],ap_uint<10> valve_column_[64],ap_uint<1> valvearray[HEIGHT*64])
{
	valveLoop:for(int ind=0;ind<64;ind++)
	{
		ap_uint<10> valveright=valve_column_[ind];
		rowLoop:for(int i = 0; i < HEIGHT; i++)
		{
			#pragma HLS PIPELINE II=1
			ap_uint<5> stonesum=0;

			assert(valveright>1);
			assert(valveright<30);
			colLoop:for(int j = 0; j < valveright; j++)
			{
				#pragma HLS unroll
				ap_uint<1> stonesimgdata=stonesimg[i*HEIGHT+j];
				stonesum=stonesum+stonesimgdata<<5;
			}
			if(stonesum>(1<<5))
			{
				valvearray[i*HEIGHT+ind]=1;
			}
			else
			{
				valvearray[i*HEIGHT+ind]=0;
			}
		}
	}

}

然后是xf_posite_tb.cpp即CPU端:

#include "xf_headers.h"
#include "xf_accel_config.h"

int main()
{
	read xml file and turn them to array...in PS
	int valves[64];
	ap_uint<10> *valve_column_=(ap_uint<10> *)sds_alloc(64*sizeof(ap_uint<10>));
	cv::FileStorage fs0("/home/jumper/JPMV_IMPS_XRT/SysPara/mathxrt7.xml", cv::FileStorage::READ);
	if( !(fs0.isOpened()) )
	{
		std::cout<<"Error:invalid xml file!"<<std::endl;
		return -1;
	}
	char valve_name[100];
	for(int index=0;index!=64;index++)
	{
		sprintf(valve_name,"valve%d",index);
		fs0[valve_name]>>valves[index];
		valve_column_[index]=(ap_uint<10>)valves[index];
	}
	fs0.release();

	///read images...in PS
	cv::Mat imgLow_cv = cv::imread("./data/7_low.tiff", -1);
	if (!imgLow_cv.data)
	{
		printf("Cannot open image.\n");
		return 0;
	}

	ap_uint<10> in_width = imgLow_cv.cols;
	ap_uint<8> in_height = imgLow_cv.rows;

	static xf::Mat<XF_16UC1, HEIGHT, WIDTH, NPIX> imgInput(imgLow_cv.rows,imgLow_cv.cols);
	imgInput.copyTo(imgLow_cv.data);

	///convert 16bit to 8 bit image and threshold...in PL
	ap_uint<16> thresh_upper, thresh_lower;
	thresh_upper = 65535;
	thresh_lower = 40000;

	static xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> bit8imgbw(imgLow_cv.rows,imgLow_cv.cols);
	static xf::Mat<XF_8UC1, HEIGHT, WIDTH, NPIX> imgOutput(imgLow_cv.rows,imgLow_cv.cols);

	threshold_accel(imgInput, bit8imgbw,imgOutput,thresh_upper,thresh_lower);

	///get the threshold image from PL and find contour...in PS
	cv::Mat binaryimg(imgLow_cv.rows,imgLow_cv.cols,CV_8UC1);
	binaryimg.data=imgOutput.copyFrom();

	std::vector<std::vector<cv::Point> > contours;
	findContours( binaryimg, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE );
	if(contours.size()==0)
	{
		return 1;
	}
	cv::Mat stonesall(imgLow_cv.rows,imgLow_cv.cols,CV_8UC1,cv::Scalar(0));
	for (int i = 0; i< int(contours.size()); i++)
	{
		double area=cv::contourArea(contours[i]);
		if(area<20)
		{
			continue;
		}
		cv::drawContours(stonesall,contours,i,cv::Scalar(1),-1);//255
	}

	//get all the position stones in an image ,calculate valves data...in PL
	ap_uint<1> *resultimg=(ap_uint<1> *)sds_alloc(HEIGHT*WIDTH*sizeof(ap_uint<1>));
	for(int r=0;r!=HEIGHT;r++)
	{
		for(int c=0;c!=WIDTH;c++)
		{
			resultimg[r*HEIGHT+c]=(ap_uint<1>)(stonesall.ptr<uchar>(r)[c]);
		}
	}

	ap_uint<1> *resultvalve=(ap_uint<1> *)sds_alloc(HEIGHT*64*sizeof(ap_uint<1>));
	convertvalve(resultimg,valve_column_,resultvalve);


	sds_free(valve_column_);
	sds_free(resultimg);
	sds_free(resultvalve);

	return 0;
}

编译显示的警告是:为什么要显示这个警告,明明threshold_accel是我的硬件函数啊?为何不能用优化指令?

Generating data motion network
INFO: [DMAnalysis 83-4494] Analyzing hardware accelerators...
WARNING: [DMAnalysis 83-4502] Function xf::convertTo<1,0,192,448,1> @ /home/jumper/FPGA_projects/sdsoc_study/threshnew/libs/xfopencv/include/imgproc/xf_convert_bitdepth.hpp is not a HW accelerator but has SDS pragma applied
WARNING: [DMAnalysis 83-4502] Function xf::Threshold<1,0,192,448,1> @ /home/jumper/FPGA_projects/sdsoc_study/threshnew/libs/xfopencv/include/imgproc/xf_threshold.hpp is not a HW accelerator but has SDS pragma applied
INFO: [DMAnalysis 83-4497] Analyzing callers to hardware accelerators...
INFO: [DMAnalysis 83-4444] Scheduling data transfer graph for partition 0
INFO: [DMAnalysis 83-4446] Creating data motion network hardware for partition 0
INFO: [DMAnalysis 83-4448] Creating software stub functions for partition 0
INFO: [DMAnalysis 83-4450] Generating data motion network report for partition 0
INFO: [DMAnalysis 83-4454] Rewriting caller code

编译显示的错误是:

Creating block diagram (BD)
Creating top.bd.tcl
ERROR: [CF2XD 83-22322] BRAM data width (1-bits) for port 'stonesimg_V_PORTA' must be a multiple of 8.  
ERROR: [CF2XD 83-22322] You can use a Vivado HLS interface pragma to map to an AXI4 stream interface.
ERROR: [CF2XD 83-2239] failed to create xd_adapter for accelerator comp convertvalve_1
ERROR: [CF2XD 83-2009] An error has occurred during generation of the system block diagram.  For more information, please look for additional ERROR messages in the console and in log files.
Error generating intermediate design file top.xml
Error creating IPI .bd design file, exiting

加速函数convertvalve中我希望随机访问那些参数,所以不能将RANDOM改成SEQUENTIAL,但我又不想更改ap_uint<1>为8的倍数,因为我想节省内存,本来数据类型就是1bit就够了。 randomly access意味着使用BRAM,竟然和ap_uint<1>冲突啊?

今天官网上有大神回复说:do not use accurate data type as the parameters of accelarator functions!!但是可以在加速器内部使用!!!

我知道如果我将RANDOM改成SEQUENTIAL也就是流的形式访问数组,那么可以编译通过,我记得流的意味着使用AXI-master传输,我改了试了一下,结果编译通过了:因为有2个加速函数所以会有2份报告:

这是HLS报告,显示只要88321个时钟0.8ms这是第二个函数的HLS报告,min和max相差很大,所以应该有问题,暂时只看第一个函数的报告。

看到第一个函数HLS报告只需要88321个时钟,但data motion报告中,第一个函数的transfer time需要很多个时钟?!另外PHYSICAL_CONTIGUOUS与contiguous与paged的区别??无论是HLS综合报告还是data motion报告显示的时钟与最后拷贝到开发板的运行时钟之间的关系?最后在开发板上的时钟=HLS时钟+data motion时钟?

二、版本二

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

元气少女缘结神

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值