HLS第十三课(bayer video)

本例利用之前介绍的基础函数进行功能级实现。

void bayer_video_accl(T_UINT(AXIMM_BPP, AXIMM_NPPC)* src, hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)>& dst, \
    ap_uint<4> index_in, ap_uint<4>& index_out, ap_uint<1> param_en, \
    u16 flip_mode, u32 crop_x, u32 crop_y, u32 crop_width, u32 crop_height, u16 bayer_mode, u32 dst_width, u32 dst_height, u16 sensor_width, u16 sensor_height, ap_uint<4> delay_frame)
{
#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst

#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS

#pragma HLS INTERFACE ap_stable port=index_in  

#pragma HLS INTERFACE ap_stable port=flip_mode 
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y 
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height 
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame

    const ap_uint<4> index = index_in - delay_frame;
	printf("index:%d\n", (int)index);
	index_out = index;

    //param
    const u8 param_flip_mode = (param_en == 0) ? (flip_mode & 0xff) : ((flip_mode >> 8) & 0xff);
    const u16 param_crop_x = (param_en == 0) ? (crop_x & 0xffff) : ((crop_x >> 16) & 0xffff);
    const u16 param_crop_y = (param_en == 0) ? (crop_y & 0xffff) : ((crop_y >> 16) & 0xffff);
    const u16 param_crop_width = (param_en == 0) ? (crop_width & 0xffff) : ((crop_width >> 16) & 0xffff);
    const u16 param_crop_height = (param_en == 0) ? (crop_height & 0xffff) : ((crop_height >> 16) & 0xffff);
    const u8 param_bayer_mode = (param_en == 0) ? (bayer_mode & 0xff) : ((bayer_mode >> 8) & 0xff);
    const u16 param_dst_width = (param_en == 0) ? (dst_width & 0xffff) : ((dst_width >> 16) & 0xffff);
    const u16 param_dst_height = (param_en == 0) ? (dst_height & 0xffff) : ((dst_height >> 16) & 0xffff);

	
    ap_uint<1> param_vflip_mode = param_flip_mode & 0x1;
    ap_uint<1> param_hflip_mode = (param_flip_mode >> 1) & 0x1;
     
    hls::stream<T_UINT(AXIMM_BPP, AXIMM_NPPC)> inter0("inter0");  
    hls::stream<T_UINT(BAYER_BPP, BAYER_NPPC)> inter1("inter1");  
#pragma HLS STREAM variable=inter0 depth=1024 
#pragma HLS STREAM variable=inter1 depth=16

    xf::Mat<BAYER_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img0(param_crop_height, param_crop_width);
    xf::Mat<VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img1(param_crop_height, param_crop_width);
    xf::Mat<VIDEO_TYPE, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC> img2(param_dst_height, param_dst_width);
    xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img3(param_dst_height, param_dst_width);
    xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img4(param_dst_height, param_dst_width);
#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16

#pragma HLS dataflow	 
	array2stream<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC>(src, inter0, index, param_crop_x, param_crop_y, param_crop_width, param_crop_height, sensor_width, sensor_height, param_vflip_mode); 
    stream_nppc_down<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC, BAYER_NPPC>(inter0, inter1, param_crop_width, param_crop_height);
    stream2xfMat<BAYER_TYPE, BAYER_BPP, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC>(inter1, img0);
    
    xf::lz::bayer_cfa<XF_BORDER_CONSTANT, XF_FILTER_5X5, BAYER_TYPE, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC, 0>(img0, img1, param_bayer_mode);
    
    xf::resize <XF_INTERPOLATION_BILINEAR, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, 5> (img1, img2); 
    xfMat_nppc_down<VIDEO_TYPE, VIDEO_BPP, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, VPOST_NPPC>(img2, img3);
    xfMat_hflip<VPOST_TYPE, VPOST_BPP, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC>(img3, img4, param_hflip_mode);
    xf::xfMat2AXIvideo(img4, dst);
}

输入源对象是AXIMM,输出目的对象是AXIS的stream。此外,本函数还需要一系列的配置参数。
对于src和dst,被添加了interface约束。
其中,src被实现为m_axi接口,dst则被实现为axis接口。

#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst

对于return, crop_x ,crop_y ,crop_width ,crop_height 等等,被添加了interface约束。
它们被实现为s_axilite接口,并绑定到同一个AXILITE总线中,所以,它们被实现为位于CONTROL_BUS这个总线上的具有不同读写地址的REG。

#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS 
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS

对于index_in,被添加了interface约束。
它被实现为input接口,且不受到ap_rst的复位影响,所以使用了ap_stable类型。

#pragma HLS INTERFACE ap_stable port=index_in  

同样的,crop_x ,crop_y ,crop_width ,crop_height, 等等,我们也不希望它们受到ap_rst的复位影响,一旦MCU写入了REG,除非下一次写入更新,否则值不变。所以,它们也使用了ap_stable类型。

#pragma HLS INTERFACE ap_stable port=flip_mode 
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y 
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height 
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame

对于index_out,默认被添加了interface约束,使用ap_vld类型。
它被实现为output接口,伴随着vld指示信号。

函数内定义了两个局部对象,stream的对象。被添加了stream约束。

#pragma HLS STREAM variable=inter0 depth=1024 
#pragma HLS STREAM variable=inter1 depth=16

函数内定义了五个局部对象,mat的对象,被添加了stream约束。

#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16

函数整体处理的调用过程,被添加了dataflow约束。

#pragma HLS DATAFLOW

注意,这里使用的函数bayer_cfa,它用到了C++的命名空间的编程技巧。
为了防止和其他的函数或者变量重名,如果在C语言中,需要手动扩展函数名,例如加前缀或者后缀来解决这个问题。但是在C++中,可以更简便的解决这个问题,就是使用namespace。
编译过程中,编译器实际上也是通过加前缀或者后缀的方式来解决重名问题的,只不过,这个过程不再是手动完成的,从而减轻工作量,并降低出错概率。
后续将介绍bayer_cfa这个函数的实现。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
然后来看看testbench,

#include "bayer_video_accl.h"
#include "bayer_video_tb.h"

#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv2/imgproc/imgproc.hpp"

#include "common/xf_axi.h"

using namespace cv;

需要使用opencv库,并使用namespace。

int main(int argc, char *argv[])
{	
	if (argc != 2)
	{
		printf("usage: %s bayer.png\n", argv[0]);
		return -1;
	}	

	cv::Mat in_img;
	in_img = cv::imread(argv[1], 0);
	if (in_img.data == NULL)
	{
		fprintf(stderr,"Cannot open image at %s\n", argv[1]);
		return 0;
	}

	int width = in_img.size().width;
	int height = in_img.size().height;
    ap_uint<4> index_in = 2;
    ap_uint<4> index_out = 0;
    ap_uint<4> delay_frame = 2;
    u8 flip_mode = 3;//(0, 0)
	u8 bayer_mode = 1;
	int crop_width = width;
	int crop_height = height;
    int crop_x = (width - crop_width)/4*2;
	int crop_y = (height - crop_height)/4*2;
    int dst_width = 2112;
    int dst_height = 1216;

    printf("width=%d, height=%d, (%d %d %d %d)\n", width, height, crop_x, crop_y, crop_width, crop_height);
	
	if ((width != BAYER_WIDTH) || (height != BAYER_HEIGHT))
	{
		printf("image size is wrong, real:%dx%d, needed:%dx%d \n", width, height, BAYER_WIDTH, BAYER_HEIGHT);
		return 0;
	}

	cv::Mat out_img;
	out_img.create(dst_height, dst_width, CV_8UC3);
	
	  
    T_UINT(AXIMM_BPP, AXIMM_NPPC)* src = (T_UINT(AXIMM_BPP, AXIMM_NPPC)*)(in_img.data);
	hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)> dst;

 
    bayer_video_accl(src, dst, index_in, index_out, 0, flip_mode, crop_x, crop_y, crop_width, crop_height, bayer_mode, dst_width, dst_height, width, height, delay_frame); 
    
    AXIvideo2cvMatxf<VPOST_NPPC>(dst, out_img);
    
    cv::imwrite("hls.bmp", out_img);
	cv::imwrite("image.bmp", in_img);
	
	printf("test ok!\n");
	return 0;
}

主体框架和基本框架一样,
这里,由于src是AXIMM,所以,直接取出输出的cvMat对象的data的指针,赋值给src。
这里,使用了AXIvideo2cvMatxf的具象函数,将AXIS的流转换成cvMat对象,方便后面输出为文件。

+++++++++++++++++++++++++++++++++++++++++++++++++++
补充,bayer_cfa的实现。
cfa是一个5X5的窗口,我们可以借鉴xf_sobel的5X5的窗口的实现方式,来修改定制出适合我们的CFA算法的5X5的窗口的函数。

首先看看xf_sobel是怎么实现的。

#include "common/xf_common.h"
#include "common/xf_utility.h"
#include "hls_stream.h"

typedef unsigned int  uint32_t;

需要使用到xfopencv的一些库函数。所以包含相关的H文件。

template<int BORDER_TYPE,int FILTER_TYPE, int SRC_T,int DST_T, int ROWS, int COLS,int NPC=1,bool USE_URAM=false>
void Sobel(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty)
{
	
#pragma HLS INLINE OFF

	...
	uint16_t width = _src_mat.cols >> XF_BITSHIFT(NPC);
	uint16_t height = _src_mat.rows;

 	if(FILTER_TYPE == XF_FILTER_5X5)
	{
		xFSobelFilter5x5<SRC_T, DST_T, ROWS,COLS,XF_CHANNELS(SRC_T,NPC),XF_DEPTH(SRC_T,NPC),XF_DEPTH(DST_T,NPC),NPC,XF_WORDWIDTH(SRC_T,NPC),XF_WORDWIDTH(DST_T,NPC),(COLS >> XF_BITSHIFT(NPC)),USE_URAM>
		(_src_mat, _dst_matx, _dst_maty, height, width);
	}
}

这个函数只是一个封装函数,实际调用的是xFSobelFilter5x5的具象函数。

来看看这个xFSobelFilter5x5模板函数。

template<int SRC_T, int DST_T, int ROWS, int COLS, int PLANES,int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC,bool USE_URAM>
void xFSobelFilter5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,
		uint16_t img_height, uint16_t img_width)
{
	...	
	row_ind = 2;

	Clear_Row_Loop:
	for(col = 0; col < img_width; col++)
	{
		buf[0][col] = 0;
		buf[1][col] = 0;
		buf[row_ind][col] = _src_mat.read(read_index++);
	}

	row_ind++;

	Read_Row2_Loop:
	for(col = 0; col < img_width; col++)
	{
		buf[row_ind][col] = _src_mat.read(read_index++);
	}
	row_ind++;

	Row_Loop:
	for(row = 2; row < img_height+2; row++)
	{
		// modify the buffer indices to re use
		if(row_ind == 4)
		{
			tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4;
		}
		else if(row_ind == 0)
		{
			tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0;
		}
		else if(row_ind == 1)
		{
			tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1;
		}
		else if(row_ind == 2)
		{
			tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2;
		}
		else if(row_ind == 3)
		{
			tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3;
		}

		src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0;
		src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0;
		src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0;
		src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0;
		src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0;

		inter_valx = inter_valy = 0;
		...
		
		ProcessSobel5x5<SRC_T, DST_T, ROWS, COLS, PLANES,DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>( _src_mat, _dst_matx,  _dst_maty, buf, src_buf1,	src_buf2, src_buf3, src_buf4, src_buf5,	GradientValuesX, GradientValuesY,
				inter_valx, inter_valy, img_width, img_height, row_ind, shift_x, shift_y, tp1, tp2, mid, bottom1, bottom2, row, read_index, write_index);

		...
			
		_dst_matx.write(write_index,inter_valx);
		_dst_maty.write(write_index++,inter_valy);

		...
		row_ind++;
		if(row_ind == 5)
		{
			row_ind = 0;
		}
	} // Row_Loop
}
// xFSobelFilter5x5

在每一行的处理时,会调用ProcessSobel5x5的具象函数。分别计算出X方向的梯度和Y方向的梯度。
来看看ProcessSobel5x5的模板。

template<int SRC_T, int DST_T, int ROWS, int COLS,int PLANES, int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void ProcessSobel5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,

		XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC)+4],
		XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC)+4],
		XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
		XF_SNAME(WORDWIDTH_DST) &inter_valx, XF_SNAME(WORDWIDTH_DST) &inter_valy, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t &shift_x, uint16_t &shift_y,
		ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, int &read_index, int &write_index)
{
	...
	
	Col_Loop:
	for(ap_uint<13> col = 0; col < img_width; col++)
	{
		if(row < img_height)
			buf[row_ind][col] = _src_mat.read(read_index++);
		else
			buf[bottom2][col] = 0;

		buf0 = buf[tp1][col];
		buf1 = buf[tp2][col];
		buf2 = buf[mid][col];
		buf3 = buf[bottom1][col];
		buf4 = buf[bottom2][col];
		
		...
		xFSobel5x5<NPC,PLANES, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY,
				src_buf1, src_buf2, src_buf3, src_buf4, src_buf5);
		...
		for(ap_uint<4> i = 0; i < 4; i++)
		{
			src_buf1[i] = src_buf1[buf_size-(4 - i)];
			src_buf2[i] = src_buf2[buf_size-(4 - i)];
			src_buf3[i] = src_buf3[buf_size-(4 - i)];
			src_buf4[i] = src_buf4[buf_size-(4 - i)];
			src_buf5[i] = src_buf5[buf_size-(4 - i)];
		}
		
		...
		
		_dst_matx.write(write_index,inter_valx);
		_dst_maty.write(write_index++,inter_valy);
		
		...		
	} // Col_Loop
}

这个函数,用一个for循环体,逐点处理,直至一行全部处理完。
这个函数里,首先从window中获取了对应的调用了xFSobel5x5的具象函数。
来看看xFSobel5x5的模板。

template<int NPC,int PLANES, int DEPTH_SRC, int DEPTH_DST>
void xFSobel5x5(
		XF_PTNAME(DEPTH_DST) *GradientvaluesX,
		XF_PTNAME(DEPTH_DST) *GradientvaluesY,
		XF_PTNAME(DEPTH_SRC) *src_buf1,
		XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3,
		XF_PTNAME(DEPTH_SRC) *src_buf4,
		XF_PTNAME(DEPTH_SRC) *src_buf5)
{
	Compute_Grad_Loop:
	for(ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++ )
	{
		GradientvaluesX[j] = xFGradientX5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
		GradientvaluesY[j] = xFGradientY5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
	}
}

这个函数,在一个for循环体中,对一笔数据逐点处理,分别计算出X方向的梯度和Y方向的梯度。

再来看看xFGradientX5x5和xFGradientY5x5

/******************************************************************
 *  Sobel Filter X-Gradient used is 5x5
 *
 *       --- ---- ---- ---- ---
 *      | -1 |  -2 | 0 |  2 | 1 |
 *       --- ---- ---- ---- ---
 *      | -4 |  -8 | 0 |  8 | 4 |
 *       --- ---- ---- ---- ---
 *      | -6 | -12 | 0 | 12 | 6 |
 *       --- ---- ---- ---- ---
 *      | -4 |  -8 | 0 |  8 | 4 |
 *       --- ---- ---- ---- ---
 *      | -1 |  -2 | 0 |  2 | 1 |
 *       --- ---- ---- ---- ---
 ****************************************************************/

template<int PLANES,int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientX5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5)
{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_x = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}

	for(int i=0,k=0;i< PLANES;i++,k+=8)
	{
		short int M00 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf5[1].range(k+7,k)) << 1);
		short int M01 = (short int)((short int)src_buf1[4].range(k+7,k) + (short int)src_buf5[4].range(k+7,k))-((short int)src_buf1[0].range(k+7,k) + (short int)src_buf5[0].range(k+7,k));
		short int A00 = (short int)(((short int)src_buf1[3].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 1);
		short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf4[0].range(k+7,k)) << 2);
		short int M03 = (short int)((short int)src_buf2[1].range(k+7,k) + (short int)src_buf4[1].range(k+7,k)) << 3;
		short int A01 = (short int)((short int)src_buf2[3].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3;
		short int A02 = (short int)((short int)src_buf2[4].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 2;
		short int M04 = (short int)src_buf3[0].range(k+7,k) * 6;
		short int M05 = (short int)src_buf3[1].range(k+7,k) * 12;
		short int A03 = (short int)src_buf3[3].range(k+7,k) * 12;
		short int A04 = (short int)src_buf3[4].range(k+7,k) * 6;
		short int S00 = M00 + M02;
		short int S01 = M03 + M04 + M05;
		short int A0 = A00 + A01;
		short int A1 = A02 + A03;
		short int A2 = A04 + M01;
		short int FA = A0 + A1 + A2;
		short int FS = S00 + S01;
		short int out_x = FA - FS;

		g_x = (XF_PTNAME(DEPTH_DST))out_x;

		if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
		{
			if(out_x < 0)
				g_x = 0;
			else if (out_x > 255)
				g_x = 255;
		}
		
		out_val.range(p+(STEP-1),p)=g_x;
		p+=STEP;
	}
	return out_val;
}

/****************************************************************
 * Sobel Filter Y-Gradient used is 5x5
 *
 *       --- ---- ---- ---- ---
 *      | -1 |  -4 |  -6 |  -4 | -1 |
 *       --- ---- ---- ---- ---
 *      | -2 |  -8 | -12 |  -8 | -2 |
 *       --- ---- ---- ---- ---
 *      |  0 |   0 |   0 |   0 |  0 |
 *       --- ---- ---- ---- --- ---
 *      |  2 |   8 |  12 |   8 |  2 |
 *       --- ---- ---- ---- --- ---
 *      |  1 |   4 |   6 |   4 |  1 |
 *       --- ---- ---- ---- --- ---
 ******************************************************************/
template<int PLANES,int DEPTH_SRC, int  DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientY5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5)
		{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}

	for(int i=0,k=0;i<PLANES;i++,k+=8)
	{
		short int M00 = ((short int)src_buf5[0].range(k+7,k) + (short int)src_buf5[4].range(k+7,k)) - ((short int)src_buf1[0].range(k+7,k) + (short int)src_buf1[4].range(k+7,k));
		short int M01 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf1[3].range(k+7,k)) << 2);
		short int A00 = (short int)(((short int)src_buf5[1].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 2);
		short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf2[4].range(k+7,k)) << 1);
		short int A01 = (short int)(((short int)src_buf4[0].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 1);
		short int M03 = (short int)(((short int)src_buf2[1].range(k+7,k) + (short int)src_buf2[3].range(k+7,k)) << 3);
		short int A02 = (short int)(((short int)src_buf4[1].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3);
		short int M04 = (short int)(src_buf1[2].range(k+7,k) * 6);
		short int M05 = (short int)(src_buf2[2].range(k+7,k) * 12);
		short int A03 = (short int)(src_buf4[2].range(k+7,k) * 12);
		short int A04 = (short int)(src_buf5[2].range(k+7,k) * 6);
		short int S00 = M01 + M02 + M03;
		short int S01 = M04 + M05;
		short int A0 = A00 + A01;
		short int A1 = A02 + A03;
		short int A2 = A04 + M00;
		short int FA = A0 + A1 + A2;
		short int FS = S00 + S01;
		short int out_y = FA - FS;

		g_y = (XF_PTNAME(DEPTH_DST))out_y;

		if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
		{
			if(out_y < 0)
				g_y = 0;
			else if (out_y > 255)
				g_y = 255;
		}
		
		out_val.range(p+(STEP-1),p)=g_y;
		p+=STEP;
	}
	return out_val;
}

这两个函数,按照sobel的各个位置的系数进行乘加运算,求出X方向的梯度值和Y方向的梯度值。

至此,整个sobel的过程已经分析完毕。
涉及到窗口window填充,window shift moving相关的操作,都在前几层的调用中来完成,最终调用的xFGradientX5x5和xFGradientY5x5,才是实现核算子运算的函数。
所以,对于window shift moving类型的函数实现,我们完全可以基于sobel的实现来修改移植,
关于window shift moving的操作,交给框架函数,核算子运算的函数,修改实现。

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

简单起见,我们可以把移植的函数放到xf的namespace的方式来移植。如下代码:

namespace xf{
	...
} // end of namespace xf

框架函数并不需要修改代码,所以我们只是简单的把函数名修改一下即可。
例如:
最上层的Sobel函数名,修改为bayer_cfa,
其中调用了xFSobelFilter5x5,修改为bayer_cfa_Filter5x5,
在xFSobelFilter5x5中,调用了ProcessSobel5x5,这里要修改为process_bayer_cfa_5X5,
在ProcessSobel5x5中,调用了xFSobel5x5,这里要修改为bayer_cfa_5X5,
在xFSobel5x5中,调用了xFGradientX5x5和xFGradientY5x5,这是两个核算子运算函数,我们并不需要两个函数,所以只需要调用一个核算子运算函数即可。
这里,以xFGradientY5x5为基础,进行核算子运算函数的修改。取名为bayer_cfa_core_5X5。

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bayer_cfa_core_5X5代码:

template<int PLANES,int DEPTH_SRC, int  DEPTH_DST>
XF_PTNAME(DEPTH_DST) bayer_cfa_core_5X5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
		XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4,	XF_PTNAME(DEPTH_SRC) *src_buf5, u16 row, u16 col, u16 npc, u8 mode)
		{
#pragma HLS INLINE off
	XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
	int STEP,p=0;
	if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
	{
		STEP=16;
	}
	else
	{
		STEP=8;
	}
 
    u8 block[5][5];
    block[0][0] = src_buf1[0];
    block[0][1] = src_buf1[1];
    block[0][2] = src_buf1[2];
    block[0][3] = src_buf1[3];
    block[0][4] = src_buf1[4];

    block[1][0] = src_buf2[0];
    block[1][1] = src_buf2[1];
    block[1][2] = src_buf2[2];
    block[1][3] = src_buf2[3];
    block[1][4] = src_buf2[4];

    block[2][0] = src_buf3[0];
    block[2][1] = src_buf3[1];
    block[2][2] = src_buf3[2];
    block[2][3] = src_buf3[3];
    block[2][4] = src_buf3[4];

    block[3][0] = src_buf4[0];
    block[3][1] = src_buf4[1];
    block[3][2] = src_buf4[2];
    block[3][3] = src_buf4[3];
    block[3][4] = src_buf4[4];

    block[4][0] = src_buf5[0];
    block[4][1] = src_buf5[1];
    block[4][2] = src_buf5[2];
    block[4][3] = src_buf5[3];
    block[4][4] = src_buf5[4];

    short raw = block[2][2];
    short RB_G = kernel_RB_G(block);
    short G_RB_h = kernel_G_RB_h(block);
    short G_RB_v = kernel_G_RB_v(block);
    short RB_BR = kernel_RB_BR(block);
    short r0 = raw;
    short g0 = RB_G;
    short b0 = RB_BR;
    short r1 = RB_BR;
    short g1 = RB_G;
    short b1 = raw;
    short r2 = G_RB_v;
    short g2 = raw;
    short b2 = G_RB_h;
    short r3 = G_RB_h;
    short g3 = raw;
    short b3 = G_RB_v;
    short r = 0;
    short g = 0;
    short b = 0;

    u16 i = row;
    u16 j = col * 8 + npc;

    if (mode == 0)//BG
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r1;
                g = g1;
                b = b1; 
            } else {
                r = r2;
                g = g2;
                b = b2;
            }	
        }
        else {
            if ((j & 0x1) == 0) {
                r = r3;
                g = g3;
                b = b3;
            } else {
                r = r0;
                g = g0;
                b = b0;
            }
        }	
    }
    else if (mode == 1)//GB
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r2;
                g = g2;
                b = b2;
            } 
            else {
                r = r1;
                g = g1;
                b = b1;
            }	
        } 
        else {
            if ((j & 0x1) == 0) {
                r = r0;
                g = g0;
                b = b0;
            } 
            else {
                r = r3;
                g = g3;
                b = b3;
            }
        }
    }	
    else if (mode == 2)//GR
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r3;
                g = g3;
                b = b3;
            } else {
                r = r0;
                g = g0;
                b = b0;
            }	
        } else {
            if ((j & 0x1) == 0) {
                r = r1;
                g = g1;
                b = b1;
            } else {
                r = r2;
                g = g2;
                b = b2;
            }
        }
    }	
    else //RG
    {
        if ((i & 0x1) == 0) {
            if ((j & 0x1) == 0) {
                r = r0;
                g = g0;
                b = b0;
            } else {
                r = r3;
                g = g3;
                b = b3;
            }	
        } else {
            if ((j & 0x1) == 0) {
                r = r2;
                g = g2;
                b = b2;
            } else {
                r = r1;
                g = g1;
                b = b1;
            }
        }
    }

    r = (r > 255) ? 255 : ((r < 0) ? 0 : r);
    g = (g > 255) ? 255 : ((g < 0) ? 0 : g);
    b = (b > 255) ? 255 : ((b < 0) ? 0 : b);

    out_val.range(7, 0) = b;
    out_val.range(15, 8) = g;
    out_val.range(23, 16) = r;
	
	return out_val;
}

其中,用到的几个计算函数,如下所示:

short kernel_RB_G(u8 block[5][5]) 
{
/*
	0 1 2 3 4
	R G R G R 0
	G B G B G 1
	R G R G R 2
	G B G B G 3
	R G R G R 4
*/
	//y在前,x在后
	//hd = |G21 - G23| + |R22 * 2 - R20 -R24|
	//vd = |G12 - G32| + |R22 * 2 - R02 -R42|
	//gh = (G21 + G23) / 2 + (R22 * 2 - R20 -R24) / 4;
	//gv = (G12 + G32) / 2 + (R22 * 2 - R02 -R42) / 4;
	//ge = (gh + gv) / 2;
	short hd = ABS_DEC(block[2][1], block[2][3]) + ABS_DEC(block[2][2] * 2, block[2][0] + block[2][4]);
	short vd = ABS_DEC(block[1][2], block[3][2]) + ABS_DEC(block[2][2] * 2, block[0][2] + block[4][2]);
	short gh = (block[2][1] + block[2][3]) / 2 + (block[2][2] * 2 - block[2][0] - block[2][4]) / 4;
	short gv = (block[1][2] + block[3][2]) / 2 + (block[2][2] * 2 - block[0][2] - block[4][2]) / 4;
	short ge = (gh + gv) / 2;
	
	if (hd < vd) {
		return gh;
	} else if (vd < hd)	{
		return gv;
	} else {
		return ge;
	} 
}

short kernel_G_RB_h(u8 block[5][5])
{
/*
	0 1 2 3 4 
	G B G B G 0
	R G R G R 1
	G B G B G 2
	R G R G R 3
	G B G B G 4 
*/	
	short val = (block[2][1] + block[2][3]) / 2;
	return val;
}

short kernel_G_RB_v(u8 block[5][5])
{
/*
	0 1 2 3 4 
	G B G B G 0
	R G R G R 1
	G B G B G 2
	R G R G R 3
	G B G B G 4 
*/	
	short val = (block[1][2] + block[3][2]) / 2;
	return val;
}

short kernel_RB_BR(u8 block[5][5])
{
/*
	0 1 2 3 4
	R G R G R 0
	G B G B G 1
	R G R G R 2
	G B G B G 3
	R G R G R 4 
*/	
	short hd = ABS_DEC(block[1][1], block[3][3]);
	short vd = ABS_DEC(block[1][3], block[3][1]);
	short ph = (block[1][1] + block[3][3]) / 2;
	short pv = (block[1][3] + block[3][1]) / 2;
	short pe = (ph + pv) / 2;
	
	if (hd < vd) {
		return ph;
	} else if (vd < hd) {
		return pv;
	} else {
		return pe;
	} 
}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值