本例利用之前介绍的基础函数进行功能级实现。
void bayer_video_accl(T_UINT(AXIMM_BPP, AXIMM_NPPC)* src, hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)>& dst, \
ap_uint<4> index_in, ap_uint<4>& index_out, ap_uint<1> param_en, \
u16 flip_mode, u32 crop_x, u32 crop_y, u32 crop_width, u32 crop_height, u16 bayer_mode, u32 dst_width, u32 dst_height, u16 sensor_width, u16 sensor_height, ap_uint<4> delay_frame)
{
#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS
#pragma HLS INTERFACE ap_stable port=index_in
#pragma HLS INTERFACE ap_stable port=flip_mode
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame
const ap_uint<4> index = index_in - delay_frame;
printf("index:%d\n", (int)index);
index_out = index;
//param
const u8 param_flip_mode = (param_en == 0) ? (flip_mode & 0xff) : ((flip_mode >> 8) & 0xff);
const u16 param_crop_x = (param_en == 0) ? (crop_x & 0xffff) : ((crop_x >> 16) & 0xffff);
const u16 param_crop_y = (param_en == 0) ? (crop_y & 0xffff) : ((crop_y >> 16) & 0xffff);
const u16 param_crop_width = (param_en == 0) ? (crop_width & 0xffff) : ((crop_width >> 16) & 0xffff);
const u16 param_crop_height = (param_en == 0) ? (crop_height & 0xffff) : ((crop_height >> 16) & 0xffff);
const u8 param_bayer_mode = (param_en == 0) ? (bayer_mode & 0xff) : ((bayer_mode >> 8) & 0xff);
const u16 param_dst_width = (param_en == 0) ? (dst_width & 0xffff) : ((dst_width >> 16) & 0xffff);
const u16 param_dst_height = (param_en == 0) ? (dst_height & 0xffff) : ((dst_height >> 16) & 0xffff);
ap_uint<1> param_vflip_mode = param_flip_mode & 0x1;
ap_uint<1> param_hflip_mode = (param_flip_mode >> 1) & 0x1;
hls::stream<T_UINT(AXIMM_BPP, AXIMM_NPPC)> inter0("inter0");
hls::stream<T_UINT(BAYER_BPP, BAYER_NPPC)> inter1("inter1");
#pragma HLS STREAM variable=inter0 depth=1024
#pragma HLS STREAM variable=inter1 depth=16
xf::Mat<BAYER_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img0(param_crop_height, param_crop_width);
xf::Mat<VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC> img1(param_crop_height, param_crop_width);
xf::Mat<VIDEO_TYPE, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC> img2(param_dst_height, param_dst_width);
xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img3(param_dst_height, param_dst_width);
xf::Mat<VPOST_TYPE, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC> img4(param_dst_height, param_dst_width);
#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16
#pragma HLS dataflow
array2stream<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC>(src, inter0, index, param_crop_x, param_crop_y, param_crop_width, param_crop_height, sensor_width, sensor_height, param_vflip_mode);
stream_nppc_down<AXIMM_BPP, AXIMM_HEIGHT, AXIMM_WIDTH, AXIMM_NPPC, BAYER_NPPC>(inter0, inter1, param_crop_width, param_crop_height);
stream2xfMat<BAYER_TYPE, BAYER_BPP, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC>(inter1, img0);
xf::lz::bayer_cfa<XF_BORDER_CONSTANT, XF_FILTER_5X5, BAYER_TYPE, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC, 0>(img0, img1, param_bayer_mode);
xf::resize <XF_INTERPOLATION_BILINEAR, VIDEO_TYPE, BAYER_HEIGHT, BAYER_WIDTH, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, 5> (img1, img2);
xfMat_nppc_down<VIDEO_TYPE, VIDEO_BPP, VIDEO_HEIGHT, VIDEO_WIDTH, VIDEO_NPPC, VPOST_NPPC>(img2, img3);
xfMat_hflip<VPOST_TYPE, VPOST_BPP, VPOST_HEIGHT, VPOST_WIDTH, VPOST_NPPC>(img3, img4, param_hflip_mode);
xf::xfMat2AXIvideo(img4, dst);
}
输入源对象是AXIMM,输出目的对象是AXIS的stream。此外,本函数还需要一系列的配置参数。
对于src和dst,被添加了interface约束。
其中,src被实现为m_axi接口,dst则被实现为axis接口。
#pragma HLS INTERFACE m_axi depth=970400 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst
对于return, crop_x ,crop_y ,crop_width ,crop_height 等等,被添加了interface约束。
它们被实现为s_axilite接口,并绑定到同一个AXILITE总线中,所以,它们被实现为位于CONTROL_BUS这个总线上的具有不同读写地址的REG。
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=param_en bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=flip_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=bayer_mode bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=dst_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=dst_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=sensor_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=sensor_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=delay_frame bundle=CONTROL_BUS
对于index_in,被添加了interface约束。
它被实现为input接口,且不受到ap_rst的复位影响,所以使用了ap_stable类型。
#pragma HLS INTERFACE ap_stable port=index_in
同样的,crop_x ,crop_y ,crop_width ,crop_height, 等等,我们也不希望它们受到ap_rst的复位影响,一旦MCU写入了REG,除非下一次写入更新,否则值不变。所以,它们也使用了ap_stable类型。
#pragma HLS INTERFACE ap_stable port=flip_mode
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height
#pragma HLS INTERFACE ap_stable port=bayer_mode
#pragma HLS INTERFACE ap_stable port=dst_width
#pragma HLS INTERFACE ap_stable port=dst_height
#pragma HLS INTERFACE ap_stable port=sensor_width
#pragma HLS INTERFACE ap_stable port=sensor_height
#pragma HLS INTERFACE ap_stable port=delay_frame
对于index_out,默认被添加了interface约束,使用ap_vld类型。
它被实现为output接口,伴随着vld指示信号。
函数内定义了两个局部对象,stream的对象。被添加了stream约束。
#pragma HLS STREAM variable=inter0 depth=1024
#pragma HLS STREAM variable=inter1 depth=16
函数内定义了五个局部对象,mat的对象,被添加了stream约束。
#pragma HLS stream variable=img0.data dim=1 depth=16
#pragma HLS stream variable=img1.data dim=1 depth=1024
#pragma HLS stream variable=img2.data dim=1 depth=1024
#pragma HLS stream variable=img3.data dim=1 depth=16
#pragma HLS stream variable=img4.data dim=1 depth=16
函数整体处理的调用过程,被添加了dataflow约束。
#pragma HLS DATAFLOW
注意,这里使用的函数bayer_cfa,它用到了C++的命名空间的编程技巧。
为了防止和其他的函数或者变量重名,如果在C语言中,需要手动扩展函数名,例如加前缀或者后缀来解决这个问题。但是在C++中,可以更简便的解决这个问题,就是使用namespace。
编译过程中,编译器实际上也是通过加前缀或者后缀的方式来解决重名问题的,只不过,这个过程不再是手动完成的,从而减轻工作量,并降低出错概率。
后续将介绍bayer_cfa这个函数的实现。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
然后来看看testbench,
#include "bayer_video_accl.h"
#include "bayer_video_tb.h"
#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "common/xf_axi.h"
using namespace cv;
需要使用opencv库,并使用namespace。
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("usage: %s bayer.png\n", argv[0]);
return -1;
}
cv::Mat in_img;
in_img = cv::imread(argv[1], 0);
if (in_img.data == NULL)
{
fprintf(stderr,"Cannot open image at %s\n", argv[1]);
return 0;
}
int width = in_img.size().width;
int height = in_img.size().height;
ap_uint<4> index_in = 2;
ap_uint<4> index_out = 0;
ap_uint<4> delay_frame = 2;
u8 flip_mode = 3;//(0, 0)
u8 bayer_mode = 1;
int crop_width = width;
int crop_height = height;
int crop_x = (width - crop_width)/4*2;
int crop_y = (height - crop_height)/4*2;
int dst_width = 2112;
int dst_height = 1216;
printf("width=%d, height=%d, (%d %d %d %d)\n", width, height, crop_x, crop_y, crop_width, crop_height);
if ((width != BAYER_WIDTH) || (height != BAYER_HEIGHT))
{
printf("image size is wrong, real:%dx%d, needed:%dx%d \n", width, height, BAYER_WIDTH, BAYER_HEIGHT);
return 0;
}
cv::Mat out_img;
out_img.create(dst_height, dst_width, CV_8UC3);
T_UINT(AXIMM_BPP, AXIMM_NPPC)* src = (T_UINT(AXIMM_BPP, AXIMM_NPPC)*)(in_img.data);
hls::stream<T_AXIU(VPOST_BPP, VPOST_NPPC)> dst;
bayer_video_accl(src, dst, index_in, index_out, 0, flip_mode, crop_x, crop_y, crop_width, crop_height, bayer_mode, dst_width, dst_height, width, height, delay_frame);
AXIvideo2cvMatxf<VPOST_NPPC>(dst, out_img);
cv::imwrite("hls.bmp", out_img);
cv::imwrite("image.bmp", in_img);
printf("test ok!\n");
return 0;
}
主体框架和基本框架一样,
这里,由于src是AXIMM,所以,直接取出输出的cvMat对象的data的指针,赋值给src。
这里,使用了AXIvideo2cvMatxf的具象函数,将AXIS的流转换成cvMat对象,方便后面输出为文件。
+++++++++++++++++++++++++++++++++++++++++++++++++++
补充,bayer_cfa的实现。
cfa是一个5X5的窗口,我们可以借鉴xf_sobel的5X5的窗口的实现方式,来修改定制出适合我们的CFA算法的5X5的窗口的函数。
首先看看xf_sobel是怎么实现的。
#include "common/xf_common.h"
#include "common/xf_utility.h"
#include "hls_stream.h"
typedef unsigned int uint32_t;
需要使用到xfopencv的一些库函数。所以包含相关的H文件。
template<int BORDER_TYPE,int FILTER_TYPE, int SRC_T,int DST_T, int ROWS, int COLS,int NPC=1,bool USE_URAM=false>
void Sobel(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty)
{
#pragma HLS INLINE OFF
...
uint16_t width = _src_mat.cols >> XF_BITSHIFT(NPC);
uint16_t height = _src_mat.rows;
if(FILTER_TYPE == XF_FILTER_5X5)
{
xFSobelFilter5x5<SRC_T, DST_T, ROWS,COLS,XF_CHANNELS(SRC_T,NPC),XF_DEPTH(SRC_T,NPC),XF_DEPTH(DST_T,NPC),NPC,XF_WORDWIDTH(SRC_T,NPC),XF_WORDWIDTH(DST_T,NPC),(COLS >> XF_BITSHIFT(NPC)),USE_URAM>
(_src_mat, _dst_matx, _dst_maty, height, width);
}
}
这个函数只是一个封装函数,实际调用的是xFSobelFilter5x5的具象函数。
来看看这个xFSobelFilter5x5模板函数。
template<int SRC_T, int DST_T, int ROWS, int COLS, int PLANES,int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC,bool USE_URAM>
void xFSobelFilter5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,
uint16_t img_height, uint16_t img_width)
{
...
row_ind = 2;
Clear_Row_Loop:
for(col = 0; col < img_width; col++)
{
buf[0][col] = 0;
buf[1][col] = 0;
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Read_Row2_Loop:
for(col = 0; col < img_width; col++)
{
buf[row_ind][col] = _src_mat.read(read_index++);
}
row_ind++;
Row_Loop:
for(row = 2; row < img_height+2; row++)
{
// modify the buffer indices to re use
if(row_ind == 4)
{
tp1 = 0; tp2 = 1; mid = 2; bottom1 = 3; bottom2 = 4;
}
else if(row_ind == 0)
{
tp1 = 1; tp2 = 2; mid = 3; bottom1 = 4; bottom2 = 0;
}
else if(row_ind == 1)
{
tp1 = 2; tp2 = 3; mid = 4; bottom1 = 0; bottom2 = 1;
}
else if(row_ind == 2)
{
tp1 = 3; tp2 = 4; mid = 0; bottom1 = 1; bottom2 = 2;
}
else if(row_ind == 3)
{
tp1 = 4; tp2 = 0; mid = 1; bottom1 = 2; bottom2 = 3;
}
src_buf1[0] = src_buf1[1] = src_buf1[2] = src_buf1[3] = 0;
src_buf2[0] = src_buf2[1] = src_buf2[2] = src_buf2[3] = 0;
src_buf3[0] = src_buf3[1] = src_buf3[2] = src_buf3[3] = 0;
src_buf4[0] = src_buf4[1] = src_buf4[2] = src_buf4[3] = 0;
src_buf5[0] = src_buf5[1] = src_buf5[2] = src_buf5[3] = 0;
inter_valx = inter_valy = 0;
...
ProcessSobel5x5<SRC_T, DST_T, ROWS, COLS, PLANES,DEPTH_SRC, DEPTH_DST, NPC, WORDWIDTH_SRC, WORDWIDTH_DST, TC>( _src_mat, _dst_matx, _dst_maty, buf, src_buf1, src_buf2, src_buf3, src_buf4, src_buf5, GradientValuesX, GradientValuesY,
inter_valx, inter_valy, img_width, img_height, row_ind, shift_x, shift_y, tp1, tp2, mid, bottom1, bottom2, row, read_index, write_index);
...
_dst_matx.write(write_index,inter_valx);
_dst_maty.write(write_index++,inter_valy);
...
row_ind++;
if(row_ind == 5)
{
row_ind = 0;
}
} // Row_Loop
}
// xFSobelFilter5x5
在每一行的处理时,会调用ProcessSobel5x5的具象函数。分别计算出X方向的梯度和Y方向的梯度。
来看看ProcessSobel5x5的模板。
template<int SRC_T, int DST_T, int ROWS, int COLS,int PLANES, int DEPTH_SRC, int DEPTH_DST, int NPC, int WORDWIDTH_SRC, int WORDWIDTH_DST, int TC>
void ProcessSobel5x5(xf::Mat<SRC_T, ROWS, COLS, NPC> & _src_mat,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_matx,xf::Mat<DST_T, ROWS, COLS, NPC> & _dst_maty,
XF_SNAME(WORDWIDTH_SRC) buf[5][(COLS >> XF_BITSHIFT(NPC))], XF_PTNAME(DEPTH_SRC) src_buf1[XF_NPIXPERCYCLE(NPC)+4],
XF_PTNAME(DEPTH_SRC) src_buf2[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf3[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf4[XF_NPIXPERCYCLE(NPC)+4], XF_PTNAME(DEPTH_SRC) src_buf5[XF_NPIXPERCYCLE(NPC)+4],
XF_PTNAME(DEPTH_DST) GradientValuesX[XF_NPIXPERCYCLE(NPC)], XF_PTNAME(DEPTH_DST) GradientValuesY[XF_NPIXPERCYCLE(NPC)],
XF_SNAME(WORDWIDTH_DST) &inter_valx, XF_SNAME(WORDWIDTH_DST) &inter_valy, uint16_t img_width, uint16_t img_height, ap_uint<13> row_ind, uint16_t &shift_x, uint16_t &shift_y,
ap_uint<4> tp1, ap_uint<4> tp2, ap_uint<4> mid, ap_uint<4> bottom1, ap_uint<4> bottom2, ap_uint<13> row, int &read_index, int &write_index)
{
...
Col_Loop:
for(ap_uint<13> col = 0; col < img_width; col++)
{
if(row < img_height)
buf[row_ind][col] = _src_mat.read(read_index++);
else
buf[bottom2][col] = 0;
buf0 = buf[tp1][col];
buf1 = buf[tp2][col];
buf2 = buf[mid][col];
buf3 = buf[bottom1][col];
buf4 = buf[bottom2][col];
...
xFSobel5x5<NPC,PLANES, DEPTH_SRC, DEPTH_DST>(GradientValuesX, GradientValuesY,
src_buf1, src_buf2, src_buf3, src_buf4, src_buf5);
...
for(ap_uint<4> i = 0; i < 4; i++)
{
src_buf1[i] = src_buf1[buf_size-(4 - i)];
src_buf2[i] = src_buf2[buf_size-(4 - i)];
src_buf3[i] = src_buf3[buf_size-(4 - i)];
src_buf4[i] = src_buf4[buf_size-(4 - i)];
src_buf5[i] = src_buf5[buf_size-(4 - i)];
}
...
_dst_matx.write(write_index,inter_valx);
_dst_maty.write(write_index++,inter_valy);
...
} // Col_Loop
}
这个函数,用一个for循环体,逐点处理,直至一行全部处理完。
这个函数里,首先从window中获取了对应的调用了xFSobel5x5的具象函数。
来看看xFSobel5x5的模板。
template<int NPC,int PLANES, int DEPTH_SRC, int DEPTH_DST>
void xFSobel5x5(
XF_PTNAME(DEPTH_DST) *GradientvaluesX,
XF_PTNAME(DEPTH_DST) *GradientvaluesY,
XF_PTNAME(DEPTH_SRC) *src_buf1,
XF_PTNAME(DEPTH_SRC) *src_buf2,
XF_PTNAME(DEPTH_SRC) *src_buf3,
XF_PTNAME(DEPTH_SRC) *src_buf4,
XF_PTNAME(DEPTH_SRC) *src_buf5)
{
Compute_Grad_Loop:
for(ap_uint<5> j = 0; j < XF_NPIXPERCYCLE(NPC); j++ )
{
GradientvaluesX[j] = xFGradientX5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
GradientvaluesY[j] = xFGradientY5x5<PLANES,DEPTH_SRC, DEPTH_DST>(&src_buf1[j], &src_buf2[j], &src_buf3[j], &src_buf4[j], &src_buf5[j]);
}
}
这个函数,在一个for循环体中,对一笔数据逐点处理,分别计算出X方向的梯度和Y方向的梯度。
再来看看xFGradientX5x5和xFGradientY5x5
/******************************************************************
* Sobel Filter X-Gradient used is 5x5
*
* --- ---- ---- ---- ---
* | -1 | -2 | 0 | 2 | 1 |
* --- ---- ---- ---- ---
* | -4 | -8 | 0 | 8 | 4 |
* --- ---- ---- ---- ---
* | -6 | -12 | 0 | 12 | 6 |
* --- ---- ---- ---- ---
* | -4 | -8 | 0 | 8 | 4 |
* --- ---- ---- ---- ---
* | -1 | -2 | 0 | 2 | 1 |
* --- ---- ---- ---- ---
****************************************************************/
template<int PLANES,int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientX5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4, XF_PTNAME(DEPTH_SRC) *src_buf5)
{
#pragma HLS INLINE off
XF_PTNAME(DEPTH_DST) g_x = 0,out_val=0;
int STEP,p=0;
if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
{
STEP=16;
}
else
{
STEP=8;
}
for(int i=0,k=0;i< PLANES;i++,k+=8)
{
short int M00 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf5[1].range(k+7,k)) << 1);
short int M01 = (short int)((short int)src_buf1[4].range(k+7,k) + (short int)src_buf5[4].range(k+7,k))-((short int)src_buf1[0].range(k+7,k) + (short int)src_buf5[0].range(k+7,k));
short int A00 = (short int)(((short int)src_buf1[3].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 1);
short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf4[0].range(k+7,k)) << 2);
short int M03 = (short int)((short int)src_buf2[1].range(k+7,k) + (short int)src_buf4[1].range(k+7,k)) << 3;
short int A01 = (short int)((short int)src_buf2[3].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3;
short int A02 = (short int)((short int)src_buf2[4].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 2;
short int M04 = (short int)src_buf3[0].range(k+7,k) * 6;
short int M05 = (short int)src_buf3[1].range(k+7,k) * 12;
short int A03 = (short int)src_buf3[3].range(k+7,k) * 12;
short int A04 = (short int)src_buf3[4].range(k+7,k) * 6;
short int S00 = M00 + M02;
short int S01 = M03 + M04 + M05;
short int A0 = A00 + A01;
short int A1 = A02 + A03;
short int A2 = A04 + M01;
short int FA = A0 + A1 + A2;
short int FS = S00 + S01;
short int out_x = FA - FS;
g_x = (XF_PTNAME(DEPTH_DST))out_x;
if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
{
if(out_x < 0)
g_x = 0;
else if (out_x > 255)
g_x = 255;
}
out_val.range(p+(STEP-1),p)=g_x;
p+=STEP;
}
return out_val;
}
/****************************************************************
* Sobel Filter Y-Gradient used is 5x5
*
* --- ---- ---- ---- ---
* | -1 | -4 | -6 | -4 | -1 |
* --- ---- ---- ---- ---
* | -2 | -8 | -12 | -8 | -2 |
* --- ---- ---- ---- ---
* | 0 | 0 | 0 | 0 | 0 |
* --- ---- ---- ---- --- ---
* | 2 | 8 | 12 | 8 | 2 |
* --- ---- ---- ---- --- ---
* | 1 | 4 | 6 | 4 | 1 |
* --- ---- ---- ---- --- ---
******************************************************************/
template<int PLANES,int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST) xFGradientY5x5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4, XF_PTNAME(DEPTH_SRC) *src_buf5)
{
#pragma HLS INLINE off
XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
int STEP,p=0;
if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
{
STEP=16;
}
else
{
STEP=8;
}
for(int i=0,k=0;i<PLANES;i++,k+=8)
{
short int M00 = ((short int)src_buf5[0].range(k+7,k) + (short int)src_buf5[4].range(k+7,k)) - ((short int)src_buf1[0].range(k+7,k) + (short int)src_buf1[4].range(k+7,k));
short int M01 = (short int)(((short int)src_buf1[1].range(k+7,k) + (short int)src_buf1[3].range(k+7,k)) << 2);
short int A00 = (short int)(((short int)src_buf5[1].range(k+7,k) + (short int)src_buf5[3].range(k+7,k)) << 2);
short int M02 = (short int)(((short int)src_buf2[0].range(k+7,k) + (short int)src_buf2[4].range(k+7,k)) << 1);
short int A01 = (short int)(((short int)src_buf4[0].range(k+7,k) + (short int)src_buf4[4].range(k+7,k)) << 1);
short int M03 = (short int)(((short int)src_buf2[1].range(k+7,k) + (short int)src_buf2[3].range(k+7,k)) << 3);
short int A02 = (short int)(((short int)src_buf4[1].range(k+7,k) + (short int)src_buf4[3].range(k+7,k)) << 3);
short int M04 = (short int)(src_buf1[2].range(k+7,k) * 6);
short int M05 = (short int)(src_buf2[2].range(k+7,k) * 12);
short int A03 = (short int)(src_buf4[2].range(k+7,k) * 12);
short int A04 = (short int)(src_buf5[2].range(k+7,k) * 6);
short int S00 = M01 + M02 + M03;
short int S01 = M04 + M05;
short int A0 = A00 + A01;
short int A1 = A02 + A03;
short int A2 = A04 + M00;
short int FA = A0 + A1 + A2;
short int FS = S00 + S01;
short int out_y = FA - FS;
g_y = (XF_PTNAME(DEPTH_DST))out_y;
if((DEPTH_DST == XF_8UP) ||(DEPTH_DST == XF_24UP))
{
if(out_y < 0)
g_y = 0;
else if (out_y > 255)
g_y = 255;
}
out_val.range(p+(STEP-1),p)=g_y;
p+=STEP;
}
return out_val;
}
这两个函数,按照sobel的各个位置的系数进行乘加运算,求出X方向的梯度值和Y方向的梯度值。
至此,整个sobel的过程已经分析完毕。
涉及到窗口window填充,window shift moving相关的操作,都在前几层的调用中来完成,最终调用的xFGradientX5x5和xFGradientY5x5,才是实现核算子运算的函数。
所以,对于window shift moving类型的函数实现,我们完全可以基于sobel的实现来修改移植,
关于window shift moving的操作,交给框架函数,核算子运算的函数,修改实现。
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
简单起见,我们可以把移植的函数放到xf的namespace的方式来移植。如下代码:
namespace xf{
...
} // end of namespace xf
框架函数并不需要修改代码,所以我们只是简单的把函数名修改一下即可。
例如:
最上层的Sobel函数名,修改为bayer_cfa,
其中调用了xFSobelFilter5x5,修改为bayer_cfa_Filter5x5,
在xFSobelFilter5x5中,调用了ProcessSobel5x5,这里要修改为process_bayer_cfa_5X5,
在ProcessSobel5x5中,调用了xFSobel5x5,这里要修改为bayer_cfa_5X5,
在xFSobel5x5中,调用了xFGradientX5x5和xFGradientY5x5,这是两个核算子运算函数,我们并不需要两个函数,所以只需要调用一个核算子运算函数即可。
这里,以xFGradientY5x5为基础,进行核算子运算函数的修改。取名为bayer_cfa_core_5X5。
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bayer_cfa_core_5X5代码:
template<int PLANES,int DEPTH_SRC, int DEPTH_DST>
XF_PTNAME(DEPTH_DST) bayer_cfa_core_5X5(XF_PTNAME(DEPTH_SRC) *src_buf1, XF_PTNAME(DEPTH_SRC) *src_buf2,
XF_PTNAME(DEPTH_SRC) *src_buf3, XF_PTNAME(DEPTH_SRC) *src_buf4, XF_PTNAME(DEPTH_SRC) *src_buf5, u16 row, u16 col, u16 npc, u8 mode)
{
#pragma HLS INLINE off
XF_PTNAME(DEPTH_DST) g_y = 0,out_val=0;
int STEP,p=0;
if( (DEPTH_DST == XF_48SP) || (DEPTH_DST == XF_16SP) )
{
STEP=16;
}
else
{
STEP=8;
}
u8 block[5][5];
block[0][0] = src_buf1[0];
block[0][1] = src_buf1[1];
block[0][2] = src_buf1[2];
block[0][3] = src_buf1[3];
block[0][4] = src_buf1[4];
block[1][0] = src_buf2[0];
block[1][1] = src_buf2[1];
block[1][2] = src_buf2[2];
block[1][3] = src_buf2[3];
block[1][4] = src_buf2[4];
block[2][0] = src_buf3[0];
block[2][1] = src_buf3[1];
block[2][2] = src_buf3[2];
block[2][3] = src_buf3[3];
block[2][4] = src_buf3[4];
block[3][0] = src_buf4[0];
block[3][1] = src_buf4[1];
block[3][2] = src_buf4[2];
block[3][3] = src_buf4[3];
block[3][4] = src_buf4[4];
block[4][0] = src_buf5[0];
block[4][1] = src_buf5[1];
block[4][2] = src_buf5[2];
block[4][3] = src_buf5[3];
block[4][4] = src_buf5[4];
short raw = block[2][2];
short RB_G = kernel_RB_G(block);
short G_RB_h = kernel_G_RB_h(block);
short G_RB_v = kernel_G_RB_v(block);
short RB_BR = kernel_RB_BR(block);
short r0 = raw;
short g0 = RB_G;
short b0 = RB_BR;
short r1 = RB_BR;
short g1 = RB_G;
short b1 = raw;
short r2 = G_RB_v;
short g2 = raw;
short b2 = G_RB_h;
short r3 = G_RB_h;
short g3 = raw;
short b3 = G_RB_v;
short r = 0;
short g = 0;
short b = 0;
u16 i = row;
u16 j = col * 8 + npc;
if (mode == 0)//BG
{
if ((i & 0x1) == 0) {
if ((j & 0x1) == 0) {
r = r1;
g = g1;
b = b1;
} else {
r = r2;
g = g2;
b = b2;
}
}
else {
if ((j & 0x1) == 0) {
r = r3;
g = g3;
b = b3;
} else {
r = r0;
g = g0;
b = b0;
}
}
}
else if (mode == 1)//GB
{
if ((i & 0x1) == 0) {
if ((j & 0x1) == 0) {
r = r2;
g = g2;
b = b2;
}
else {
r = r1;
g = g1;
b = b1;
}
}
else {
if ((j & 0x1) == 0) {
r = r0;
g = g0;
b = b0;
}
else {
r = r3;
g = g3;
b = b3;
}
}
}
else if (mode == 2)//GR
{
if ((i & 0x1) == 0) {
if ((j & 0x1) == 0) {
r = r3;
g = g3;
b = b3;
} else {
r = r0;
g = g0;
b = b0;
}
} else {
if ((j & 0x1) == 0) {
r = r1;
g = g1;
b = b1;
} else {
r = r2;
g = g2;
b = b2;
}
}
}
else //RG
{
if ((i & 0x1) == 0) {
if ((j & 0x1) == 0) {
r = r0;
g = g0;
b = b0;
} else {
r = r3;
g = g3;
b = b3;
}
} else {
if ((j & 0x1) == 0) {
r = r2;
g = g2;
b = b2;
} else {
r = r1;
g = g1;
b = b1;
}
}
}
r = (r > 255) ? 255 : ((r < 0) ? 0 : r);
g = (g > 255) ? 255 : ((g < 0) ? 0 : g);
b = (b > 255) ? 255 : ((b < 0) ? 0 : b);
out_val.range(7, 0) = b;
out_val.range(15, 8) = g;
out_val.range(23, 16) = r;
return out_val;
}
其中,用到的几个计算函数,如下所示:
short kernel_RB_G(u8 block[5][5])
{
/*
0 1 2 3 4
R G R G R 0
G B G B G 1
R G R G R 2
G B G B G 3
R G R G R 4
*/
//y在前,x在后
//hd = |G21 - G23| + |R22 * 2 - R20 -R24|
//vd = |G12 - G32| + |R22 * 2 - R02 -R42|
//gh = (G21 + G23) / 2 + (R22 * 2 - R20 -R24) / 4;
//gv = (G12 + G32) / 2 + (R22 * 2 - R02 -R42) / 4;
//ge = (gh + gv) / 2;
short hd = ABS_DEC(block[2][1], block[2][3]) + ABS_DEC(block[2][2] * 2, block[2][0] + block[2][4]);
short vd = ABS_DEC(block[1][2], block[3][2]) + ABS_DEC(block[2][2] * 2, block[0][2] + block[4][2]);
short gh = (block[2][1] + block[2][3]) / 2 + (block[2][2] * 2 - block[2][0] - block[2][4]) / 4;
short gv = (block[1][2] + block[3][2]) / 2 + (block[2][2] * 2 - block[0][2] - block[4][2]) / 4;
short ge = (gh + gv) / 2;
if (hd < vd) {
return gh;
} else if (vd < hd) {
return gv;
} else {
return ge;
}
}
short kernel_G_RB_h(u8 block[5][5])
{
/*
0 1 2 3 4
G B G B G 0
R G R G R 1
G B G B G 2
R G R G R 3
G B G B G 4
*/
short val = (block[2][1] + block[2][3]) / 2;
return val;
}
short kernel_G_RB_v(u8 block[5][5])
{
/*
0 1 2 3 4
G B G B G 0
R G R G R 1
G B G B G 2
R G R G R 3
G B G B G 4
*/
short val = (block[1][2] + block[3][2]) / 2;
return val;
}
short kernel_RB_BR(u8 block[5][5])
{
/*
0 1 2 3 4
R G R G R 0
G B G B G 1
R G R G R 2
G B G B G 3
R G R G R 4
*/
short hd = ABS_DEC(block[1][1], block[3][3]);
short vd = ABS_DEC(block[1][3], block[3][1]);
short ph = (block[1][1] + block[3][3]) / 2;
short pv = (block[1][3] + block[3][1]) / 2;
short pe = (ph + pv) / 2;
if (hd < vd) {
return ph;
} else if (vd < hd) {
return pv;
} else {
return pe;
}
}