HLS第十三课（bayer video）

最新推荐文章于 2024-05-23 19:15:44 发布

Huskar_Liu

最新推荐文章于 2024-05-23 19:15:44 发布

阅读量433

点赞数

分类专栏： hls

本文链接：https://blog.csdn.net/weixin_42418557/article/details/118960615

版权

hls 专栏收录该内容

42 篇文章 107 订阅

订阅专栏

本例利用之前介绍的基础函数进行功能级实现。

void bayer_video_accl(T_UINT(AXIMM_BPP, AXIMM_NPPC)* src, hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)>& dst, \
    ap_uint<4> index_in, ap_uint<4>& index_out, ap_uint<1> param_en, \
    u16 flip_mode, u32 crop_x, u32 crop_y, u32 crop_width, u32 crop_height, u16 bayer_mode, u32 dst_width, u32 dst_height, u16 sensor_width, u16 sensor_height, ap_uint<4> delay_frame)
{
#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst

#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS

#pragma HLS INTERFACE ap_stable port=index_in  

#pragma HLS INTERFACE ap_stable port=flip_mode 
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y 
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height 
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame

    const ap_uint<4> index = index_in - delay_frame;
	printf("index:%d\n", (int)index);
	index_out = index;

    //param
    const u8 param_flip_mode = (param_en == 0) ? (flip_mode & 0xff) : ((flip_mode >> 8) & 0xff);
    const u16 param_crop_x = (param_en == 0) ? (crop_x & 0xffff) : ((crop_x >> 16) & 0xffff);
    const u16 param_crop_y = (param_en == 0) ? (crop_y & 0xffff) : ((crop_y >> 16) & 0xffff);
    const u16 param_crop_width = (param_en == 0) ? (crop_width & 0xffff) : ((crop_width >> 16) & 0xffff);
    const u16 param_crop_height = (param_en == 0) ? (crop_height & 0xffff) : ((crop_height >> 16) & 0xffff);
    const u8 param_bayer_mode = (param_en == 0) ? (bayer_mode & 0xff) : ((bayer_mode >> 8) & 0xff);
    const u16 param_dst_width = (param_en == 0) ? (dst_width & 0xffff) : ((dst_width >> 16) & 0xffff);
    const u16 param_dst_height = (param_en == 0) ? (dst_height & 0xffff) : ((dst_height >> 16) & 0xffff);

	
    ap_uint<1> param_vflip_mode = param_flip_mode & 0x1;
    ap_uint<1> param_hflip_mode = (param_flip_mode >> 1) & 0x1;
     
    hls::stream<T_UINT(AXIMM_BPP, AXIMM_NPPC)> inter0("inter0");  
    hls::stream<T_UINT(BAYER_BPP, BAYER_NPPC)> inter1("inter1");  
#pragma HLS STREAM variable=inter0 depth=1024 
#pragma HLS STREAM variable=inter1 depth=16

    xf::Mat<BAYER_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img0(param_crop_height, param_crop_width);
    xf::Mat<VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img1(param_crop_height, param_crop_width);
    xf::Mat<VIDEO_TYPE, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC> img2(param_dst_height, param_dst_width);
    xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img3(param_dst_height, param_dst_width);
    xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img4(param_dst_height, param_dst_width);
#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16

#pragma HLS dataflow	 
	array2stream<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC>(src, inter0, index, param_crop_x, param_crop_y, param_crop_width, param_crop_height, sensor_width, sensor_height, param_vflip_mode); 
    stream_nppc_down<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC, BAYER_NPPC>(inter0, inter1, param_crop_width, param_crop_height);
    stream2xfMat<BAYER_TYPE, BAYER_BPP, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC>(inter1, img0);
    
    xf::lz::bayer_cfa<XF_BORDER_CONSTANT, XF_FILTER_5X5, BAYER_TYPE, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC, 0>(img0, img1, param_bayer_mode);
    
    xf::resize <XF_INTERPOLATION_BILINEAR, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, 5> (img1, img2); 
    xfMat_nppc_down<VIDEO_TYPE, VIDEO_BPP, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, VPOST_NPPC>(img2, img3);
    xfMat_hflip<VPOST_TYPE, VPOST_BPP, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC>(img3, img4, param_hflip_mode);
    xf::xfMat2AXIvideo(img4, dst);
}

输入源对象是AXIMM，输出目的对象是AXIS的stream。此外，本函数还需要一系列的配置参数。
对于src和dst，被添加了interface约束。
其中，src被实现为m_axi接口，dst则被实现为axis接口。

#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst

对于return, crop_x ,crop_y ,crop_width ,crop_height 等等，被添加了interface约束。
它们被实现为s_axilite接口，并绑定到同一个AXILITE总线中，所以，它们被实现为位于CONTROL_BUS这个总线上的具有不同读写地址的REG。

#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS

对于index_in，被添加了interface约束。
它被实现为input接口，且不受到ap_rst的复位影响，所以使用了ap_stable类型。

#pragma HLS INTERFACE ap_stable port=index_in

同样的，crop_x ,crop_y ,crop_width ,crop_height，等等，我们也不希望它们受到ap_rst的复位影响，一旦MCU写入了REG，除非下一次写入更新，否则值不变。所以，它们也使用了ap_stable类型。

#pragma HLS INTERFACE ap_stable port=flip_mode 
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y 
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height 
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame

对于index_out，默认被添加了interface约束，使用ap_vld类型。
它被实现为output接口，伴随着vld指示信号。

函数内定义了两个局部对象，stream的对象。被添加了stream约束。

#pragma HLS STREAM variable=inter0 depth=1024 
#pragma HLS STREAM variable=inter1 depth=16

函数内定义了五个局部对象，mat的对象，被添加了stream约束。

#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16

函数整体处理的调用过程，被添加了dataflow约束。

#pragma HLS DATAFLOW

注意，这里使用的函数bayer_cfa，它用到了C++的命名空间的编程技巧。
为了防止和其他的函数或者变量重名，如果在C语言中，需要手动扩展函数名，例如加前缀或者后缀来解决这个问题。但是在C++中，可以更简便的解决这个问题，就是使用namespace。
编译过程中，编译器实际上也是通过加前缀或者后缀的方式来解决重名问题的，只不过，这个过程不再是手动完成的，从而减轻工作量，并降低出错概率。
后续将介绍bayer_cfa这个函数的实现。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
然后来看看testbench，

#include "bayer_video_accl.h"
#include "bayer_video_tb.h"

#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv2/imgproc/imgproc.hpp"

#include "common/xf_axi.h"

using namespace cv;

需要使用opencv库，并使用namespace。

int main(int argc, char *argv[])
{	
	if (argc != 2)
	{
		printf("usage: %s bayer.png\n", argv[0]);
		return -1;
	}	

	cv::Mat in_img;
	in_img = cv::imread(argv[1], 0);
	if (in_img.data == NULL)
	{
		fprintf(stderr,"Cannot open image at %s\n", argv[1]);
		return 0;
	}

	int width = in_img.size().width;
	int height = in_img.size().height;
    ap_uint<4> index_in = 2;
    ap_uint<4> index_out = 0;
    ap_uint<4> delay_frame = 2;
    u8 flip_mode = 3;//(0, 0)
	u8 bayer_mode = 1;
	int crop_width = width;
	int crop_height = height;
    int crop_x = (width - crop_width)/4*2;
	int crop_y = (height - crop_height)/4*2;
    int dst_width = 2112;
    int dst_height = 1216;

    printf("width=%d, height=%d, (%d %d %d %d)\n", width, height, crop_x, crop_y, crop_width, crop_height);
	
	if ((width != BAYER_WIDTH) || (height != BAYER_HEIGHT))
	{
		printf("image size is wrong, real:%dx%d, needed:%dx%d \n", width, height, BAYER_WIDTH, BAYER_HEIGHT);
		return 0;
	}

	cv::Mat out_img;
	out_img.create(dst_height, dst_width, CV_8UC3);
	
	  
    T_UINT(AXIMM_BPP, AXIMM_NPPC)* src = (T_UINT(AXIMM_BPP, AXIMM_NPPC)*)(in_img.data);
	hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)> dst;

 
    bayer_video_accl(src, dst, index_in, index_out, 0, flip_mode, crop_x, crop_y, crop_width, crop_height, bayer_mode, dst_width, dst_height, width, height, delay_frame); 
    
    AXIvideo2cvMatxf<VPOST_NPPC>(dst, out_img);
    
    cv::imwrite("hls.bmp", out_img);
	cv::imwrite("image.bmp", in_img);
	
	printf("test ok!\n");
	return 0;
}

主体框架和基本框架一样，
这里，由于src是AXIMM，所以，直接取出输出的cvMat对象的data的指针，赋值给src。
这里，使用了AXIvideo2cvMatxf的具象函数，将AXIS的流转换成cvMat对象，方便后面输出为文件。

+++++++++++++++++++++++++++++++++++++++++++++++++++
补充，bayer_cfa的实现。
cfa是一个5X5的窗口，我们可以借鉴xf_sobel的5X5的窗口的实现方式，来修改定制出适合我们的CFA算法的5X5的窗口的函数。

首先看看xf_sobel是怎么实现的。

#include "common/xf_common.h"
#include "common/xf_utility.h"
#include "hls_stream.h"

typedef unsigned int  uint32_t;

需要使用到xfopencv的一些库函数。所以包含相关的H文件。

template<int BORDER_TYPE,int FILTER_TYPE, int SRC_T,int DST_T, int ROWS, int COLS,int NPC=1,bool USE_URAM=false>
void Sobel(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty)
{
	
#pragma HLS INLINE OFF

	...
	uint16_t width = _src_mat.cols >> XF_BITSHIFT(NPC);
	uint16_t height = _src_mat.rows;

 	if(FILTER_TYPE == XF_FILTER_5X5)
	{
		xFSobelFilter5x5<SRC_T, DST_T, ROWS,COLS,XF_CHANNELS(SRC_T,NPC),XF_DEPTH(SRC_T,NPC),XF_DEPTH(DST_T,NPC),NPC,XF_WORDWIDTH(SRC_T,NPC),XF_WORDWIDTH(DST_T,NPC),(COLS >> XF_BITSHIFT(NPC)),USE_URAM>
		(_src_mat, _dst_matx, _dst_maty, height, width);
	}
}

这个函数只是一个封装函数，实际调用的是xFSobelFilter5x5的具象函数。

来看看这个xFSobelFilter5x5模板函数。

template<int SRC_T, int DST_T, int ROWS, int COLS, int PLANES,int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC,bool USE_URAM>
void xFSobelFilter5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,
		uint16_t img_height, uint16_t img_width)
{
	...	
	row_ind = 2;

	Clear_Row_Loop:
	for(col = 0; col < img_width; col++)
	{
		buf[0][col] = 0;
		buf[1][col] = 0;
		buf[row_ind][col] = _src_mat.read(read_index++);
	}

	row_ind++;

	Read_Row2_Loop:
	for(col = 0; col < img_width; col++)
	{
		buf[row_ind][col] = _src_mat.read(read_index++);
	}
	row_ind++;

	Row_Loop:
	for(row = 2; row < img_height+2; row++)
	{
		// modify the buffer indices to re use
		if(row_ind == 4)
		{
			tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4;
		}
		else if(row_ind == 0)
		{
			tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0;
		}
		else if(row_ind == 1)
		{
			tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1;
		}
		else if(row_ind == 2)
		{
			tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2;
		}
		else if(row_ind == 3)
		{
			tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3;
		}

		src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0;
		src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0;
		src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0;
		src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0;
		src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0;

		inter_valx = inter_valy = 0;
		...
		
		ProcessSobel5x5<SRC_T, DST_T, ROWS, COLS, PLANES,DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>( _src_mat, _dst_matx,  _dst_maty, buf, src_buf1,	src_buf2, src_buf3, src_buf4, src_buf5,	GradientValuesX, GradientValuesY,
				inter_valx, inter_valy, img_width, img_height, row_ind, shift_x, shift_y, tp1, tp2, mid, bottom1, bottom2, row, read_index, write_index);

		...
			
		_dst_matx.write(write_index,inter_valx);
		_dst_maty.write(write_index++,inter_valy);

		...
		row_ind++;
		if(row_ind == 5)
		{
			row_ind = 0;
		}
	} // Row_Loop
}
// xFSobelFilter5x5

在每一行的处理时，会调用ProcessSobel5x5的具象函数。分别计算出X方向的梯度和Y方向的梯度。
来看看ProcessSobel5x5的模板。

template<int SRC_T, int DST_T, int ROWS, int COLS,int PLANES, int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void ProcessSobel5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,

		XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC)+4],
		XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC)+4],
		XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
		XF_SNAME(WORDWIDTH_DST) &inter_valx, XF_SNAME(WORDWIDTH_DST) &inter_valy, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t &shift_x, uint16_t &shift_y,
		ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, int &read_index, int &write_index)
{
	...
	
	Col_Loop:
	for(ap_uint<13> col = 0; col < img_width; col++)
	{
		if(row < img_height)
			buf[row_ind][col] = _src_mat.read(read_index++);
		else
			buf[bottom2][col] = 0;

		buf0 = buf[tp1][col];
		buf1 = buf[tp2][col];
		buf2 = buf[mid][col];
		buf3 = buf[bottom1][col];
		buf4 = buf[bottom2][col];
		
		...
		xFSobel5x5<NPC,PLANES, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY,
				src_buf1, src_buf2, src_buf3, src_buf4, src_buf5);
		...
		for(ap_uint<4> i = 0; i < 4; i++)
		{
			src_buf1[i] = src_buf1[buf_size-(4 - i)];
			src_buf2[i] = src_buf2[buf_size-(4 - i)];
			src_buf3[i] = src_buf3[buf_size-(4 - i)];
			src_buf4[i] = src_buf4[buf_size-(4 - i)];
			src_buf5[i] = src_buf5[buf_size-(4 - i)];
		}
		
		...
		
		_dst_matx.write(write_index,inter_valx);
		_dst_maty.write(write_index++,inter_valy);
		
		...		
	} // Col_Loop
}

这个函数，用一个for循环体，逐点处理，直至一行全部处理完。
这个函数里，首先从window中获取了对应的调用了xFSobel5x5的具象函数。
来看看xFSobel5x5的模板。

template<int NPC,int PLANES, int DEPTH_SRC, int DEPTH_DST>
void xFSobel5x5(
		XF_PTNAME(DEPTH_DST) *GradientvaluesX,
		XF_PTNAME(DEPTH_DST) *GradientvaluesY,
		XF_PTNAME(DEPTH_SRC) *src_buf1,
		XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3,
		XF_PTNAME(DEPTH_SRC) *src_buf4,
		XF_PTNAME(DEPTH_SRC) *src_buf5)
{
	Compute_Grad_Loop:
	for(ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++ )
	{
		GradientvaluesX[j] = xFGradientX5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
		GradientvaluesY[j] = xFGradientY5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
	}
}

这个函数，在一个for循环体中，对一笔数据逐点处理，分别计算出X方向的梯度和Y方向的梯度。

再来看看xFGradientX5x5和xFGradientY5x5

/******************************************************************
 *  Sobel Filter X-Gradient used is 5x5
 *
 *       --- ---- ---- ---- ---
 *      | -1 |  -2 | 0 |  2 | 1 |
 *       --- ---- ---- ---- ---
 *      | -4 |  -8 | 0 |  8 | 4 |
 *       --- ---- ---- ---- ---
 *      | -6 | -12 | 0 | 12 | 6 |
 *       --- ---- ---- ---- ---
 *      | -4 |  -8 | 0 |  8 | 4 |
 *       --- ---- ---- ---- ---
 *      | -1 |  -2 | 0 |  2 | 1 |
 *       --- ---- ---- ---- ---
 ****************************************************************/

template<int PLANES,int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientX5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5)
{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_x = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}

	for(int i=0,k=0;i< PLANES;i++,k+=8)
	{
		short int M00 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf5[1].range(k+7,k)) << 1);
		short int M01 = (short int)((short int)src_buf1[4].range(k+7,k) + (short int)src_buf5[4].range(k+7,k))-((short int)src_buf1[0].range(k+7,k) + (short int)src_buf5[0].range(k+7,k));
		short int A00 = (short int)(((short int)src_buf1[3].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 1);
		short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf4[0].range(k+7,k)) << 2);
		short int M03 = (short int)((short int)src_buf2[1].range(k+7,k) + (short int)src_buf4[1].range(k+7,k)) << 3;
		short int A01 = (short int)((short int)src_buf2[3].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3;
		short int A02 = (short int)((short int)src_buf2[4].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 2;
		short int M04 = (short int)src_buf3[0].range(k+7,k) * 6;
		short int M05 = (short int)src_buf3[1].range(k+7,k) * 12;
		short int A03 = (short int)src_buf3[3].range(k+7,k) * 12;
		short int A04 = (short int)src_buf3[4].range(k+7,k) * 6;
		short int S00 = M00 + M02;
		short int S01 = M03 + M04 + M05;
		short int A0 = A00 + A01;
		short int A1 = A02 + A03;
		short int A2 = A04 + M01;
		short int FA = A0 + A1 + A2;
		short int FS = S00 + S01;
		short int out_x = FA - FS;

		g_x = (XF_PTNAME(DEPTH_DST))out_x;

		if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
		{
			if(out_x < 0)
				g_x = 0;
			else if (out_x > 255)
				g_x = 255;
		}
		
		out_val.range(p+(STEP-1),p)=g_x;
		p+=STEP;
	}
	return out_val;
}

/****************************************************************
 * Sobel Filter Y-Gradient used is 5x5
 *
 *       --- ---- ---- ---- ---
 *      | -1 |  -4 |  -6 |  -4 | -1 |
 *       --- ---- ---- ---- ---
 *      | -2 |  -8 | -12 |  -8 | -2 |
 *       --- ---- ---- ---- ---
 *      |  0 |   0 |   0 |   0 |  0 |
 *       --- ---- ---- ---- --- ---
 *      |  2 |   8 |  12 |   8 |  2 |
 *       --- ---- ---- ---- --- ---
 *      |  1 |   4 |   6 |   4 |  1 |
 *       --- ---- ---- ---- --- ---
 ******************************************************************/
template<int PLANES,int DEPTH_SRC, int  DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientY5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5)
		{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}

	for(int i=0,k=0;i<PLANES;i++,k+=8)
	{
		short int M00 = ((short int)src_buf5[0].range(k+7,k) + (short int)src_buf5[4].range(k+7,k)) - ((short int)src_buf1[0].range(k+7,k) + (short int)src_buf1[4].range(k+7,k));
		short int M01 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf1[3].range(k+7,k)) << 2);
		short int A00 = (short int)(((short int)src_buf5[1].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 2);
		short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf2[4].range(k+7,k)) << 1);
		short int A01 = (short int)(((short int)src_buf4[0].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 1);
		short int M03 = (short int)(((short int)src_buf2[1].range(k+7,k) + (short int)src_buf2[3].range(k+7,k)) << 3);
		short int A02 = (short int)(((short int)src_buf4[1].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3);
		short int M04 = (short int)(src_buf1[2].range(k+7,k) * 6);
		short int M05 = (short int)(src_buf2[2].range(k+7,k) * 12);
		short int A03 = (short int)(src_buf4[2].range(k+7,k) * 12);
		short int A04 = (short int)(src_buf5[2].range(k+7,k) * 6);
		short int S00 = M01 + M02 + M03;
		short int S01 = M04 + M05;
		short int A0 = A00 + A01;
		short int A1 = A02 + A03;
		short int A2 = A04 + M00;
		short int FA = A0 + A1 + A2;
		short int FS = S00 + S01;
		short int out_y = FA - FS;

		g_y = (XF_PTNAME(DEPTH_DST))out_y;

		if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
		{
			if(out_y < 0)
				g_y = 0;
			else if (out_y > 255)
				g_y = 255;
		}
		
		out_val.range(p+(STEP-1),p)=g_y;
		p+=STEP;
	}
	return out_val;
}

这两个函数，按照sobel的各个位置的系数进行乘加运算，求出X方向的梯度值和Y方向的梯度值。

至此，整个sobel的过程已经分析完毕。
涉及到窗口window填充，window shift moving相关的操作，都在前几层的调用中来完成，最终调用的xFGradientX5x5和xFGradientY5x5，才是实现核算子运算的函数。
所以，对于window shift moving类型的函数实现，我们完全可以基于sobel的实现来修改移植，
关于window shift moving的操作，交给框架函数，核算子运算的函数，修改实现。

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

简单起见，我们可以把移植的函数放到xf的namespace的方式来移植。如下代码：

namespace xf{
	...
} // end of namespace xf

框架函数并不需要修改代码，所以我们只是简单的把函数名修改一下即可。
例如：
最上层的Sobel函数名，修改为bayer_cfa，
其中调用了xFSobelFilter5x5，修改为bayer_cfa_Filter5x5，
在xFSobelFilter5x5中，调用了ProcessSobel5x5，这里要修改为process_bayer_cfa_5X5，
在ProcessSobel5x5中，调用了xFSobel5x5，这里要修改为bayer_cfa_5X5，
在xFSobel5x5中，调用了xFGradientX5x5和xFGradientY5x5，这是两个核算子运算函数，我们并不需要两个函数，所以只需要调用一个核算子运算函数即可。
这里，以xFGradientY5x5为基础，进行核算子运算函数的修改。取名为bayer_cfa_core_5X5。

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bayer_cfa_core_5X5代码：

template<int PLANES,int DEPTH_SRC, int  DEPTH_DST>
XF_PTNAME(DEPTH_DST) bayer_cfa_core_5X5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5, u16 row, u16 col, u16 npc, u8 mode)
		{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}
 
    u8 block[5][5];
    block[0][0] = src_buf1[0];
    block[0][1] = src_buf1[1];
    block[0][2] = src_buf1[2];
    block[0][3] = src_buf1[3];
    block[0][4] = src_buf1[4];

    block[1][0] = src_buf2[0];
    block[1][1] = src_buf2[1];
    block[1][2] = src_buf2[2];
    block[1][3] = src_buf2[3];
    block[1][4] = src_buf2[4];

    block[2][0] = src_buf3[0];
    block[2][1] = src_buf3[1];
    block[2][2] = src_buf3[2];
    block[2][3] = src_buf3[3];
    block[2][4] = src_buf3[4];

    block[3][0] = src_buf4[0];
    block[3][1] = src_buf4[1];
    block[3][2] = src_buf4[2];
    block[3][3] = src_buf4[3];
    block[3][4] = src_buf4[4];

    block[4][0] = src_buf5[0];
    block[4][1] = src_buf5[1];
    block[4][2] = src_buf5[2];
    block[4][3] = src_buf5[3];
    block[4][4] = src_buf5[4];

    short raw = block[2][2];
    short RB_G = kernel_RB_G(block);
    short G_RB_h = kernel_G_RB_h(block);
    short G_RB_v = kernel_G_RB_v(block);
    short RB_BR = kernel_RB_BR(block);
    short r0 = raw;
    short g0 = RB_G;
    short b0 = RB_BR;
    short r1 = RB_BR;
    short g1 = RB_G;
    short b1 = raw;
    short r2 = G_RB_v;
    short g2 = raw;
    short b2 = G_RB_h;
    short r3 = G_RB_h;
    short g3 = raw;
    short b3 = G_RB_v;
    short r = 0;
    short g = 0;
    short b = 0;

    u16 i = row;
    u16 j = col * 8 + npc;

    if (mode == 0)//BG
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r1;
                g = g1;
                b = b1; 
            } else {
                r = r2;
                g = g2;
                b = b2;
            }	
        }
        else {
            if ((j & 0x1) == 0) {
                r = r3;
                g = g3;
                b = b3;
            } else {
                r = r0;
                g = g0;
                b = b0;
            }
        }	
    }
    else if (mode == 1)//GB
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r2;
                g = g2;
                b = b2;
            } 
            else {
                r = r1;
                g = g1;
                b = b1;
            }	
        } 
        else {
            if ((j & 0x1) == 0) {
                r = r0;
                g = g0;
                b = b0;
            } 
            else {
                r = r3;
                g = g3;
                b = b3;
            }
        }
    }	
    else if (mode == 2)//GR
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r3;
                g = g3;
                b = b3;
            } else {
                r = r0;
                g = g0;
                b = b0;
            }	
        } else {
            if ((j & 0x1) == 0) {
                r = r1;
                g = g1;
                b = b1;
            } else {
                r = r2;
                g = g2;
                b = b2;
            }
        }
    }	
    else //RG
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r0;
                g = g0;
                b = b0;
            } else {
                r = r3;
                g = g3;
                b = b3;
            }	
        } else {
            if ((j & 0x1) == 0) {
                r = r2;
                g = g2;
                b = b2;
            } else {
                r = r1;
                g = g1;
                b = b1;
            }
        }
    }

    r = (r > 255) ? 255 : ((r < 0) ? 0 : r);
    g = (g > 255) ? 255 : ((g < 0) ? 0 : g);
    b = (b > 255) ? 255 : ((b < 0) ? 0 : b);

    out_val.range(7, 0) = b;
    out_val.range(15, 8) = g;
    out_val.range(23, 16) = r;
	
	return out_val;
}

其中，用到的几个计算函数，如下所示：

short kernel_RB_G(u8 block[5][5]) 
{
/*
	0 1 2 3 4
	R G R G R 0
	G B G B G 1
	R G R G R 2
	G B G B G 3
	R G R G R 4
*/
	//y在前，x在后
	//hd = |G21 - G23| + |R22 * 2 - R20 -R24|
	//vd = |G12 - G32| + |R22 * 2 - R02 -R42|
	//gh = (G21 + G23) / 2 + (R22 * 2 - R20 -R24) / 4;
	//gv = (G12 + G32) / 2 + (R22 * 2 - R02 -R42) / 4;
	//ge = (gh + gv) / 2;
	short hd = ABS_DEC(block[2][1], block[2][3]) + ABS_DEC(block[2][2] * 2, block[2][0] + block[2][4]);
	short vd = ABS_DEC(block[1][2], block[3][2]) + ABS_DEC(block[2][2] * 2, block[0][2] + block[4][2]);
	short gh = (block[2][1] + block[2][3]) / 2 + (block[2][2] * 2 - block[2][0] - block[2][4]) / 4;
	short gv = (block[1][2] + block[3][2]) / 2 + (block[2][2] * 2 - block[0][2] - block[4][2]) / 4;
	short ge = (gh + gv) / 2;
	
	if (hd < vd) {
		return gh;
	} else if (vd < hd)	{
		return gv;
	} else {
		return ge;
	} 
}

short kernel_G_RB_h(u8 block[5][5])
{
/*
	0 1 2 3 4 
	G B G B G 0
	R G R G R 1
	G B G B G 2
	R G R G R 3
	G B G B G 4 
*/	
	short val = (block[2][1] + block[2][3]) / 2;
	return val;
}

short kernel_G_RB_v(u8 block[5][5])
{
/*
	0 1 2 3 4 
	G B G B G 0
	R G R G R 1
	G B G B G 2
	R G R G R 3
	G B G B G 4 
*/	
	short val = (block[1][2] + block[3][2]) / 2;
	return val;
}

short kernel_RB_BR(u8 block[5][5])
{
/*
	0 1 2 3 4
	R G R G R 0
	G B G B G 1
	R G R G R 2
	G B G B G 3
	R G R G R 4 
*/	
	short hd = ABS_DEC(block[1][1], block[3][3]);
	short vd = ABS_DEC(block[1][3], block[3][1]);
	short ph = (block[1][1] + block[3][3]) / 2;
	short pv = (block[1][3] + block[3][1]) / 2;
	short pe = (ph + pv) / 2;
	
	if (hd < vd) {
		return ph;
	} else if (vd < hd) {
		return pv;
	} else {
		return pe;
	} 
}

Huskar_Liu

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
HLS第十三课（bayer video）

本例利用之前介绍的基础函数进行功能级实现。void bayer_video_accl(T_UINT(AXIMM_BPP, AXIMM_NPPC)* src, hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)>& dst, \ ap_uint<4> index_in, ap_uint<4>& index_out, ap_uint<1> param_en, \ u16 flip_mode, u32
复制链接

扫一扫

专栏目录