HLS #3 Sobel算例实例应用_hls::stream 24bit to 32bit-CSDN博客

本文链接：https://blog.csdn.net/sements/article/details/102788842

对了，我最近开通了微信公众号，计划是两边会同步更新，并逐步的会将博客上的文章同步至公众号中。
感兴趣的朋友可以扫描下方的二维码或者搜索“里先森sements”来关注，欢迎来玩~！

本篇文章介绍了如何使用HLS制作一个接受24bit RGB图像数据的AXIStream输入流，进行RGB转Gray后，进行Sobel运算，并还原为24bitRGBAXIStream数据流进行输出。为了节约占用资源，这里后端使用了一个VideoScaler进行图像放大，以便HDMI进行输出。

0 - 准备工作

下载xfopencv 2019：https://github.com/Xilinx/xfopencv
将其中的Include文件放到代码目录下

1 - HLS工程

1.1 - 设计思路

我们期望让ZYNQ从SD卡读取一个BMP图片，将其转换为RGB数组存储到DDR上，通过VDMA传输到我们利用HLS生成的LeeVideoOperation IP核中进行处理，最后输出到HDMI进行显示。

这里有几个问题，第一，读取的BMP图片为RGB格式，传输占用24bit通道，而受限于内部资源以及HLS编译堆栈限制，我们这里在HLS里面能够处理的为8bit 640*480的灰度图像。同时，为了送到HDMI进行显示，我们至少需要720P即1280*720像素的24bit图像。因此这里我们既需要进行RGB转gray完成24bit到8bit供sobel计算，又需要8bit灰度转24bit 假RGB图像给HDMI进行显示。

为此，我们的IP内部流程图应当如下所示：

1.2 - 函数添加

这里不介绍如何新建一个HLS工程，参考前文即可。

注意：在使用xfopencv时，避免包含 hls_video 头文件，否则会提示ap_axiu类型重复定义

top函数文件

#include "accelmain.h"
//#include "hls_video.h"			//包含此头文件会导致提示 ap_axiu 重复定义

/*
 * 	读入BGR格式的 ap_axiu<24,1,1,1> axi数据流，
 * 	取平均值方式输出灰度ap_axiu<8,1,1,1>数据流
 */
void CvtStreamBGR2Gray(hls::stream< ap_axiu<24,1,1,1>>& srcStream,hls::stream< ap_axiu<8,1,1,1>>& dstStream,int inputWidth,int inputHeight)
{
	ap_axiu<24,1,1,1> _axiIn;
	ap_axiu<8,1,1,1> _axiOut;

	loop_trans:
	for(int i = 0 ; i < inputWidth * inputHeight ; i++)
	{
		_axiIn = srcStream.read();

		//Read the pix data
		int _pixData , _pixB , _pixG , _pixR;
		_pixData = _axiIn.data.to_uint();
		_pixB = (unsigned char)(_pixData & 0xFF)
		,_pixG = (unsigned char)((_pixData >> 8) & 0xFF)
		,_pixR = (unsigned char)((_pixData >> 16) & 0xFF);

		//gray = (R+G+B)/3
		ap_uint<8> _pixGray;
		_pixGray = (ap_uint<8>)((_pixB + _pixG + _pixR)/3);

		//(ap_uint<8>)(_axiIn.data.to_uint() & 0x0000FF);	//trans to int , mask the G and R color , then send to axiOut
		_axiOut.data = _pixGray;
		_axiOut.keep = _axiIn.keep;
		_axiOut.strb = _axiIn.strb;
		_axiOut.user = _axiIn.user;
		_axiOut.last = _axiIn.last;
		_axiOut.id = _axiIn.id;
		_axiOut.dest = _axiIn.dest;

		dstStream.write(_axiOut);
	}
}

/*
 * 	读入灰度格式的 ap_axiu<8,1,1,1> axi数据流，
 * 	填充方式输出BGRap_axiu<24,1,1,1>数据流
 */
void CvtStreamGray2BGR(hls::stream< ap_axiu<8,1,1,1>>& srcStream,hls::stream< ap_axiu<24,1,1,1>>& dstStream,int inputWidth,int inputHeight)
{
	ap_axiu<24,1,1,1> _axiOut;
	ap_axiu<8,1,1,1> _axiIn;

	loop_trans:
	for(int i = 0 ; i < inputWidth * inputHeight ; i++)
	{
		_axiIn = srcStream.read();

		unsigned char _pixData ;
		_pixData = _axiIn.data.to_char();

		ap_uint<24> _pixBGR;
		_pixBGR = (ap_uint<24>)((_pixData << 16) | (_pixData << 8) |(_pixData));

		_axiOut.data = _pixBGR;
		_axiOut.keep = _axiIn.keep;
		_axiOut.strb = _axiIn.strb;
		_axiOut.user = _axiIn.user;
		_axiOut.last = _axiIn.last;
		_axiOut.id = _axiIn.id;
		_axiOut.dest = _axiIn.dest;

		dstStream.write(_axiOut);
	}
}

void sobel_accel(xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> &_src,xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> &_dstgx,xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> &_dstgy)
{
	xf::Sobel<XF_BORDER_CONSTANT,3,XF_8UC1,XF_8UC1,MAX_HEIGHT, MAX_WIDTH,XF_NPPC1>(_src, _dstgx,_dstgy);
}

void IpSobelAccelTop(hls::stream< ap_axiu<24,1,1,1>>& srcStream,hls::stream< ap_axiu<24,1,1,1>>& dstStream,int inputWidth,int inputHeight,bool xory)
{
	hls::stream< ap_axiu<8,1,1,1>> grayStream("grayStream");
	hls::stream< ap_axiu<8,1,1,1>> sobelStream("sobelStream");
	xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> _matSrc(inputHeight,inputWidth);
	xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> _matSobelx(inputHeight,inputWidth);
	xf::Mat<XF_8UC1, MAX_HEIGHT, MAX_WIDTH, XF_NPPC1> _matSobely(inputHeight,inputWidth);

	CvtStreamBGR2Gray(srcStream,grayStream,inputWidth,inputHeight);
	xf::AXIvideo2xfMat(grayStream,_matSrc);

	sobel_accel(_matSrc, _matSobelx, _matSobely);

	xf::xfMat2AXIvideo(_matSobelx,sobelStream);
	CvtStreamGray2BGR(sobelStream,dstStream,inputWidth,inputHeight);

}

top函数头文件

#include "hls_stream.h"
#include "ap_int.h"
#include "common/xf_common.h"
#include "common/xf_utility.h"
#include "common/xf_infra.h"
#include "imgproc/xf_canny.hpp"
#include "imgproc/xf_sobel.hpp"
#include "imgproc/xf_edge_tracing.hpp"

#define MAX_WIDTH	640
#define MAX_HEIGHT	480

void IpSobelAccelTop(hls::stream< ap_axiu<24,1,1,1>>& srcStream,hls::stream< ap_axiu<24,1,1,1>>& dstStream,int inputWidth,int inputHeight,bool xory);

testbench文件

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

#include "testbench.h"
#include "accelmain.h"

int main(int argc,char** argv)
{
	if(argc != 2)
	{
		fprintf(stderr,"Invalid Number of Arguments!\nUsage:\n");
		fprintf(stderr,"<Executable Name> <input image path> \n");
		return -1;
	}

	/*	read in the color image	*/
	cv::Mat cvmRawImg;
	cvmRawImg = cv::imread(argv[1]);
	cv::imwrite("cvmRawImg.jpg",cvmRawImg);

	/*	Design Under Test	*/
	hls::stream< ap_axiu<24,1,1,1>> srcStream;
	hls::stream< ap_axiu<24,1,1,1>> dstStream;
	cv::Mat cvmIpImg;
	cvmIpImg.create(cvmRawImg.rows,cvmRawImg.cols,CV_8UC3);

	//Get the output of accel function
	cvMat2AXIvideoxf<XF_NPPC1>(cvmRawImg,srcStream);
	IpSobelAccelTop(srcStream,dstStream,cvmRawImg.cols,cvmRawImg.rows,true);
	AXIvideo2cvMatxf<XF_NPPC1>(dstStream,cvmIpImg);

	//write out for check
	cv::imwrite("cvmIpImg.jpg",cvmIpImg);

	return 0;
}

testbench头文件

#ifndef TESTBENCH_H_
#define TESTBENCH_H_

#include "ap_int.h"
#include "hls_stream.h"
#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv2/imgproc/imgproc.hpp"

#include "common/xf_axi.h"
#include "common/xf_sw_utils.h"
#include "common/xf_common.h"
#include "common/xf_utility.h"
#include "common/xf_infra.h"
#include "imgproc/xf_sobel.hpp"

#endif

CFLAG。在使用xfopencv 2019版本时，simulation与synthesis使用统一的CFLAGS，即

-I你代码下xfopencv的Include文件夹路径 -D__SDSVHLS__ --std=c++0x

里先森按：我这里使用的是VIVADO 2019，故部分参数设置页面可能与之前的版本不太一样，但是不影响参数设置。例如下图中多了一个 CSIMFLAGS，但是中本例中不需要设置

在添加Directive优化指令时，记得将IpSobelAccelTop内部的两个grayStream和sobelStream添加HLS STREAM定义，并FIFO栏填写，不使用默认值，否则会提示

ERROR: [XFORM 203-733] An internal stream xxxx.xxxx.V.user.V' with default size is used in a non-dataflow region, which may result in deadlock. Please consider to resize the stream using the directive 'set_directive_stream' or the 'HLS stream' pragma.

仿真结果

综合结果

2 - VIVADO工程

VTC配置

Clocking Wizard配置

VDMA配置

这里使用了米联客的样例“SD卡读取图片显示”，在其上做改动。VDMA通路上串入了两个IP，先是我们的 LeeVideoOperation ，然后是 VideoScaler 缩放ip，其可以将指定分辨率的输入缩放到指定分辨率。

这里，VDMA从PS的DDR读取的是一幅640*480的图片，24bit 传入 LeeVideoOperation处理，输出24bit AXIS数据流到 VideoScaler，VideoScaler将640*480分辨率缩放到1280*720分辨率，再到AXIS4-Stream to Video Out，再接HDMi输出。

Clocking Wizard 与 VideoTimingControler（VTC）都是按照720P分辨率来进行配置的，VTC里有默认的几种常用分辨率选项可以直接选择，选择完后可以看到实际需要的横向与纵向像素个数，据此再计算Clocking Wizard所需要产生的时钟评频率。这里我们在720P的分辨率下，实际有1650*750个像素，按照60FPS的帧率来计算，就需要1650*750*60 = 74.25MHZ的时钟频率，clk2的时钟频率是clk1的时钟频率x5。

3 - SDK程序

SDK程序比较简单，由两个代码文件组成，但是构建SDK工程时注意勾选添加 Fat 文件系统支持库。

BMP.c文件完成BMP图片的读取并加载为像素写入到指定的内存数组。

#include "bmp.h"
#include "ff.h"


/****************************************************************************
* Function Name  : BMP_ReadHeader
* Description    : 将读取到的数组函数转换位BPM文件信息结构体类型。由于在内存
*                * 上面数组的存储方式与结构体不同，所以要转换，而且SD读取到的
*                * 文件信息是小端模式。高位是低字节，低位是高字节，跟我们常用
*                * 的正好相反所以将数据转换过来。
* Input          : header：要转换的数组
*                * bmp：转换成的结构体
* Output         : None
* Return         : None
****************************************************************************/

void BMP_ReadHeader(uint8_t *header, BMP_HeaderTypeDef *bmp)
{

	bmp->fileHeader.bfType = ((*header) << 8) | (*(header + 1));
	header += 2;
	
	bmp->fileHeader.bfSize = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                         ((*(header + 1)) << 8) | (*header);
	header += 8;

	bmp->fileHeader.bfOffBits = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                            ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.bitSize = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                          ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.biWidth = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                          ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.biHeight = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                           ((*(header + 1)) << 8) | (*header);
	header += 6;

	bmp->infoHeader.biBitCount = ((*(header + 1)) << 8) | (*header);
	                         
	header += 2;

	bmp->infoHeader.biCompression = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                                ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.biSizeImage = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                              ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.biXPelsPerMeter = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                                  ((*(header + 1)) << 8) | (*header);
	header += 4;

	bmp->infoHeader.biYPelsPerMeter = ((*(header + 3)) << 24) | ((*(header + 2)) << 16) |
	                                  ((*(header + 1)) << 8) | (*header);
}


/****************************************************************************
* Function Name  : BMP_Picture
* Description    : 显示BMP格式的图片
* Input          : dir：要显示的图片路径和名字
* Output         : None
* Return         : None
****************************************************************************/


void BMP_Picture(uint8_t *dir , uint8_t  * buf ,uint32_t len)
{
		FRESULT res;
		FIL fsrc;
		UINT  br;
		UINT  a;

		uint8_t buffer[1024];

		BMP_HeaderTypeDef bmpHeader;
		
		/* 打开要读取的文件 */
		res = f_open(&fsrc, (const TCHAR*)dir, FA_READ);

		if(res == FR_OK)   //打开成功
	    {
			/* 读取BMP文件的文件信息 */
	        res = f_read(&fsrc, buffer, sizeof(buffer), &br);

			/* 将数组里面的数据放入到结构数组中，并排序好 */
			BMP_ReadHeader(buffer, &bmpHeader);

			a = bmpHeader.fileHeader.bfOffBits;    //去掉文件信息才开始是像素数据

			res=f_lseek(&fsrc, a);
			if(res)
			{
				return 0;
			}
			res = f_read(&fsrc, buf, len, &br);
	    }
    f_close(&fsrc);  //不论是打开，还是新建文件，一定记得关闭
}

bmp.h文件

#ifndef _bmp_H
#define _bmp_H
#include <stdio.h>

typedef struct 
{
	uint16_t bfType;        //文件类型，BMP格式为字符串BM
	uint32_t bfSize;		//图片大小，单位为KB
	uint16_t bfReserved1;	//保留位
	uint16_t bfReserved2;	//保留位
	uint32_t bfOffBits;  	//从文件头到实际图像数据之间的字节偏移量
} BMP_FileHeaderTypeDef;

typedef struct 
{
	uint32_t bitSize;		 //BMP_InfoHeaderTypeDef结构体所需要的字节数
	uint32_t biWidth;		 //图片宽度，像素位单位
	int32_t  biHeight;		 //图片高度，像素为单位。正为倒立，负为正向。
	uint16_t biPlanes;		 //颜色平面数，总为1
	uint16_t biBitCount;	 //比特数/像素。其值为：1、4、8、16、24或32
	uint32_t biCompression;  //数据压缩类型
	uint32_t biSizeImage;	 //图像大小
	uint32_t biXPelsPerMeter;//水平分辨率
	uint32_t biYPelsPerMeter;//垂直分辨率
	uint32_t biClrUsed;		 //颜色索引数
	uint32_t biClrImportant; //重要颜色索引数
		
}BMP_InfoHeaderTypeDef;

typedef struct
{
	BMP_FileHeaderTypeDef fileHeader;
	BMP_InfoHeaderTypeDef infoHeader;
		
}BMP_HeaderTypeDef;


void BMP_ReadHeader(uint8_t *header, BMP_HeaderTypeDef *bmp);
void BMP_Picture(uint8_t *dir , uint8_t * buf ,uint32_t len);



#endif

main.c文件完成了SD卡初始化、我们HLS生成的IP初始化、VideoScalerIP核初始化以及 VDMA 初始化。并通过BMP.c文件中的函数将SD卡上FAT文件系统中的图片读入到内部数组，调用VDMA传输显示。这里循环调用两张图片，间隔几秒就进行切换。

/*
 *
Company: cz123 Electronic Technology Co., Ltd.
         Liyang Milian Electronic Technology Co., Ltd.
Brand: MSXBO
BBS: www.osrc.cn
WEB: www.msxbo.com
Author: tjy
Copyright:  by msxbo 2019-2029
 *
 *
 * */
#include <stdio.h>
#include "xparameters.h"
#include "xsdps.h"
#include "xil_printf.h"
#include "ff.h"
#include "xvideo_scaler.h"
#include "xipsobelacceltop.h"

#define H_STRIDE            640
#define H_ACTIVE            640
#define V_ACTIVE            480
#define VIDEO_LENGTH  (H_STRIDE*V_ACTIVE)


#define VDMA_BASEADDR   XPAR_AXI_VDMA_0_BASEADDR
#define DDR_BASEADDR    0x00000000
#define VIDEO_BASEADDR0 0x01000000
#define BUF_SIZE 640*480*3

static FATFS SD_Dev; // File System instance
char *SD_Path = "0:/";  //  string pointer to the logical drive number
XVideo_scaler gVideoScaler;
XIpsobelacceltop gIpSobelAccelTop;

u8 RD_Buf1[BUF_SIZE] __attribute__ ((aligned(32)));
u8 RD_Buf2[BUF_SIZE] __attribute__ ((aligned(32)));
u8 RD_Buf3[BUF_SIZE] __attribute__ ((aligned(32)));
u8 RD_Buf4[BUF_SIZE] __attribute__ ((aligned(32)));


void Xil_DCacheFlush(void);

void show_img( const unsigned char * addr, u32 size_x, u32 size_y)
{
	u32 x=0;
	u32 y=0;
	u32 r,g,b;
	for(y=size_y;y>0;y--)
	{
		for(x=0;x<size_x;x++)
		{
			b = *(addr++);
			g = *(addr++);
			r = *(addr++);
			Xil_Out32((VIDEO_BASEADDR0+(((y-1)*size_x)+x)*4),((r<<16)|(g<<8)|(b<<0)));
		}
	}

	Xil_DCacheFlush();
}


void VDMA_init()
{
	int i;
	for(i=0;i<VIDEO_LENGTH;i++)
	{
		Xil_Out32(VIDEO_BASEADDR0+i*4,0);
	}
	Xil_DCacheFlush();
	Xil_Out32((VDMA_BASEADDR + 0x000), 0x3);
	Xil_Out32((VDMA_BASEADDR + 0x05c), VIDEO_BASEADDR0);
	Xil_Out32((VDMA_BASEADDR + 0x060), VIDEO_BASEADDR0);
	Xil_Out32((VDMA_BASEADDR + 0x064), VIDEO_BASEADDR0);
	Xil_Out32((VDMA_BASEADDR + 0x058), (H_STRIDE*4));
	Xil_Out32((VDMA_BASEADDR + 0x054), (H_ACTIVE*4));
	Xil_Out32((VDMA_BASEADDR + 0x050), V_ACTIVE);

}

int SD_init()
{
	FRESULT result;
	//-----------------------mount dev-----------------------------------------------
	result = f_mount(&SD_Dev,SD_Path, 0);
	if (result != 0) {
		return XST_FAILURE;
	}
	return XST_SUCCESS;
}


int VideoScalerInit()
{
	int _status;

	_status = XVideo_scaler_Initialize(&gVideoScaler,XPAR_VIDEO_SCALER_0_DEVICE_ID);
	if(_status != XST_SUCCESS)
	{
		xil_printf("\nInit VideoScaler Error\n");
		return -1;
	}
	xil_printf("\nInit VideoScaler Success\n");

	//config video scaler
	XVideo_scaler_Set_in_height(&gVideoScaler,V_ACTIVE);
	XVideo_scaler_Set_in_width(&gVideoScaler,H_ACTIVE);
	XVideo_scaler_Set_out_height(&gVideoScaler,720);
	XVideo_scaler_Set_out_width(&gVideoScaler,1280);

	//start video scaler
	XVideo_scaler_EnableAutoRestart(&gVideoScaler);
	XVideo_scaler_Start(&gVideoScaler);

	return 0;
}

int IpSobelAccelTopInit()
{
	int _status;

	_status = XIpsobelacceltop_Initialize(&gIpSobelAccelTop,XPAR_IPSOBELACCELTOP_0_DEVICE_ID);
	if(_status != XST_SUCCESS)
	{
		xil_printf("\nInit IpSobelAccelTop Error\n");
		return -1;
	}
	xil_printf("\nInit IpSobelAccelTop Success\n");

	//config
	XIpsobelacceltop_Set_inputHeight(&gIpSobelAccelTop,V_ACTIVE);
	XIpsobelacceltop_Set_inputWidth(&gIpSobelAccelTop,H_ACTIVE);
	XIpsobelacceltop_Set_xory(&gIpSobelAccelTop,0);

	//start
	XIpsobelacceltop_EnableAutoRestart(&gIpSobelAccelTop);
	XIpsobelacceltop_Start(&gIpSobelAccelTop);
}

int main()
{
	IpSobelAccelTopInit();
	VideoScalerInit();
	VDMA_init();
	SD_init();

	//xil_printf("\nStart Read BMP\n");
	BMP_Picture((u8 *)"1.bmp" , RD_Buf1 ,BUF_SIZE);
	BMP_Picture((u8 *)"2.bmp" , RD_Buf2 ,BUF_SIZE);

	//xil_printf("\nStart HDMI Show\n");

	while(1)
	{
		show_img(RD_Buf1,640,480);
		xil_printf("idle:%d,ready:%d,done:%d",XVideo_scaler_IsIdle(&gVideoScaler),XVideo_scaler_IsReady(&gVideoScaler),XVideo_scaler_IsDone(&gVideoScaler));
		sleep(2);
		show_img(RD_Buf2,640,480);
		sleep(2);
		//show_img(RD_Buf3,1280,720);
		//sleep(5);
		//show_img(RD_Buf4,1280,720);
		//sleep(5);
	}

    return 0;
}