本例利用之前介绍的基础函数进行功能级实现。
void auto_focus(hls::stream<T_AXIU(IMAGE_BPP, IMAGE_NPPC)>& src, hls::stream<T_AXIU(IMAGE_BPP, IMAGE_NPPC)>& dst, u32& clarity_value, u16 roi_x, u16 roi_y, u16 roi_width, u16 roi_height, u16 width, u16 height)
{
#pragma HLS INTERFACE axis register both port=src
#pragma HLS INTERFACE axis register both port=dst
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=clarity_value bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=roi_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=roi_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=roi_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=roi_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=height bundle=CONTROL_BUS
#pragma HLS INTERFACE ap_stable port=roi_x
#pragma HLS INTERFACE ap_stable port=roi_y
#pragma HLS INTERFACE ap_stable port=roi_width
#pragma HLS INTERFACE ap_stable port=roi_height
#pragma HLS INTERFACE ap_stable port=width
#pragma HLS INTERFACE ap_stable port=height
#pragma HLS INTERFACE ap_stable port=clarity_value
hls::stream<T_UINT(IMAGE_BPP, IMAGE_NPPC)> inter0("inter0");
hls::stream<T_UINT(IMAGE_BPP, IMAGE_NPPC)> inter1("inter1");
hls::stream<T_UINT(GRAY_BPP, IMAGE_NPPC)> inter2("inter2");
#pragma HLS STREAM variable=inter0 depth=1024
#pragma HLS STREAM variable=inter1 depth=1024
#pragma HLS STREAM variable=inter2 depth=1024
#pragma HLS DATAFLOW
axis2stream<IMAGE_BPP, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_NPPC>(src, inter0, width, height);
stream_demux<IMAGE_BPP, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_NPPC, GRAY_BPP>(inter0, inter1, inter2, width, height);
stream_focus<GRAY_BPP, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_NPPC>(inter2, clarity_value, roi_x, roi_y, roi_width, roi_height, width, height);
stream2axis<IMAGE_BPP, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_NPPC>(inter1, dst, width, height);
}
其中用到了xfopencv提供的一些函数,也用到了自定义的公共函数。
首先,调用axis2stream,将输入的AXIS数据流,转换成UINT stream,
然后,调用stream_demux,将输入的UINT数据流,分离成RGB数据流和GRAY数据流,
然后,调用stream_focus,对灰度图像计算清晰度值clarity,
然后,调用stream2axis,将UINT数据流转换成AXIS数据流。
其中,用到了算法级函数,stream_focus。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
再来看看testbench.
using namespace cv;
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("usage: %s bayer.png\n", argv[0]);
return -1;
}
cv::Mat in_img;
in_img = cv::imread(argv[1], 1);
if (in_img.data == NULL)
{
fprintf(stderr,"Cannot open image at %s\n", argv[1]);
return 0;
}
int width = in_img.size().width;
int height = in_img.size().height;
int roi_width = 256;
int roi_height = 256;
int roi_x = (width - roi_width) / 2;
int roi_y = (height - roi_height) / 2;
cv::Mat out_img;
out_img.create(height, width, CV_8UC3);
cv::Mat ref_img, diff_img;
ref_img.create(height, width, CV_8UC3);
diff_img.create(height, width, CV_8UC3);
u32 ref_clarity_value = 0;
compute_clarity_value(in_img, ref_img, roi_x, roi_y, roi_width, roi_height, &ref_clarity_value);
hls::stream<T_AXIU(IMAGE_BPP, IMAGE_NPPC)> src;
hls::stream<T_AXIU(IMAGE_BPP, IMAGE_NPPC)> dst;
u32 clarity_value = 0;
cvMat2AXIvideo(in_img, src);
auto_focus(src, dst, clarity_value, roi_x, roi_y, roi_width, roi_height, width, height);
AXIvideo2cvMat(dst, out_img);
cv::imwrite("hls.bmp", out_img);
imwrite("image.bmp", in_img);
imwrite("ref_img.bmp", ref_img);
printf("test ok!\n");
return 0;
}
主体上和基本框架类似,
这里,调用compute_clarity_value计算清晰度,作为和DUT的执行结果的对比。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
来看看stream_focus的实现。
template<int SRC_BPP, int ROWS, int COLS, int NPPC>
void stream_focus(hls::stream<T_UINT(SRC_BPP, NPPC)>& src, u32& clarity_value, u16 roi_x, u16 roi_y, u16 roi_width, u16 roi_height, u16 width, u16 height)
{
assert(((NPPC==1)) && "Only 1 pixel-parallelism are supported");
u16 rows = height;
u16 cols = width;
u16 minx = roi_x;
u16 maxx = roi_x + roi_width;
u16 miny = roi_y;
u16 maxy = roi_y + roi_height;
const int KN = 3;
u8 block[KN][KN];
u8 line_buffer[KN - 1][COLS];
#pragma HLS array_partition variable=line_buffer complete dim=1
u16 lineStore = 1;
ap_uint<3> line0 = 1, line1 = 0;
u32 sum = 0;
PRE_COLS_LOOP:
for (u16 j = 0; j < cols; j++)
{
#pragma HLS loop_tripcount avg = COLS max = COLS
#pragma HLS pipeline II = 1
line_buffer[1][j] = src.read();
}
ROWS_LOOP:
for (u16 i = 0; i < rows; i++)
{
#pragma HLS loop_tripcount avg = ROWS max = ROWS
#pragma HLS loop_flatten off
lineStore++;
if (lineStore > 1) {
lineStore = 0;
}
if (line0 == 0)
{
line0 = 1;
line1 = 0;
}
else
{
line0 = 0;
line1 = 1;
}
COLS_LOOP:
for (u16 j = 0; j < cols + KN/2; j++)
{
#pragma HLS loop_tripcount avg = COLS max = COLS
#pragma HLS pipeline II = 1
T_UINT(SRC_BPP, NPPC) srcpixel;
// 判断本行是否已经预读,如果已经预读,则不读取,
// 如果没有预读,那么将本行的对应点,读出
if ((i < (rows - KN/2)) && (j < cols))
{
src >> srcpixel;
}
// 调整window,进行window shift moving
//逐行进行shift moving,腾出最后的位置,
for (u16 k = 0; k < KN; k++)
{
block[k][0] = block[k][1];
block[k][1] = block[k][2];
}
//为linebuffer的当前位置赋值,
//为window的每一行的最后一个腾空的位置赋值,
//最新的一行的尾端,值由最新读出的像素值覆盖,
//缓存的行的尾端,值由Linebuffer中取出的对应位置的值覆盖,
if (j < cols)
{
line_buffer[lineStore][j] = (u8)srcpixel;
block[0][2] = line_buffer[line0][j];
block[1][2] = line_buffer[line1][j];
block[2][2] = (u8)srcpixel;
}
//windows已经预填充了有效数据,开始进行窗口计算,
if (j >= KN/2)
{
//j在这里不能代表当前像素的列坐标,需要计算当前像素的X坐标和Y坐标
u16 x = j - KN/2;
u16 y = i;
u16 x1 = block[0][2] + 2 * block[1][2] + block[2][2];
u16 x2 = block[0][0] + 2 * block[1][0] + block[2][0];
u16 dx = ABS_DEC(x1, x2);
u16 y1 = block[0][0] + 2 * block[0][1] + block[0][2];
u16 y2 = block[2][0] + 2 * block[2][1] + block[2][2];
u16 dy = ABS_DEC(y1, y2);
u16 x45_1 = block[1][2] + 2 * block[2][2] + block[2][1];
u16 x45_2 = block[0][1] + 2 * block[0][0] + block[1][0];
u16 dx45 = ABS_DEC(x45_1, x45_2);
u16 y45_1 = block[0][1] + 2 * block[0][2] + block[1][2];
u16 y45_2 = block[1][0] + 2 * block[2][0] + block[2][2];
u16 dy45 = ABS_DEC(y45_1, y45_2);
u32 dsum = dx + dy + dx45 + dy45;
//如果处于ROI区域内,则将dsum累加到sum中
if ((y >= miny) && (y < maxy) && (x >= minx) && (x < maxx))
{
sum += dsum;
} // end of if ((y >= miny)...
}// end of if (j >= KN/2)...
} // end of for (u16 j = 0...
} // end of for (u16 i = 0...
// 整幅图像处理完后,将最后累计加的sum,作为clarity输出到外部变量
clarity_value = sum;
}
输入是一个UINT的stream,经过计算后,将clarity输出到外部变量中。其他的配置参数,用来指示ROI区域,以及图像的尺寸。
由于输入是stream,所以为了形成window,需要使用linebuffer。
首先,在一个for循环体中,逐点处理,预读一行,存入linebuffer。
然后,再一个两层嵌套for循环体中,进行逐点处理。
linebuffer需要手动进行管理,让它实现ringbuffer的功能。
所以,需要多个index进行管理。
lineStore,作为写入指示,
line0 和 line1,作为可用的linebuffer的指示,
在进入row_loop时,首先进行ringbuffer的调整和管理。
在设置好了各个index后,开始column_loop。进入当前行的处理。
由于使用了window,所以需要扩展边界,所以,循环次数,额外加了KN/2,
整个列的循环处理过程,分为两个阶段,
一个是linebuffer填充和window填充阶段,
一个是有效像素处理阶段。
这两个阶段,由两个代码块来分别执行,控制变量是列循环变量j,这里充当操作步控制变量的作用。所以j在这里,不能代表当前像素的列坐标。
例如,
if (j < cols){…}
if (j >= KN/2){…}
j的状态变化,控制着不同的代码块是否执行。
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
补充,compute_clarity_value原理分析
/*
0 1 2
Y Y Y 0
Y Y Y 1
Y Y Y 2
x:
-1 0 1
-2 0 2
-1 0 1
//对X核算子,顺时针旋转180度+90度,即270度
y:
1 2 1
0 0 0
-1 -2 -1
//对X核算子,顺时针旋转45度
x45:
-2 -1 0
-1 0 1
-0 1 2
//对Y核算子,顺时针旋转180度+45度,即235度
y45:
0 -1 -2
1 0 -1
2 1 0
*/
void compute_clarity_value(cv::Mat& src, cv::Mat& dst, int roi_x, int roi_y, int roi_width, int roi_height, u32* value)
{
int width = src.size().width;
int height = src.size().height;
u8 block[3][3];
//确定ROI的边界
int minx = roi_x;
int maxx = roi_x + roi_width;
int miny = roi_y;
int maxy = roi_y + roi_height;
u32 sum = 0;
//两层嵌套for循环体内,逐点处理,整幅图像,
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
//两层嵌套for循环体内,逐点处理,3X3的window,
for (int k = 0; k < 3; k++)
{
for (int l = 0; l < 3; l++)
{
//确定window中的当前处理点的X方向坐标
int x = j + l - 1;
//确定window中的当前处理点的Y方向坐标
int y = i + k - 1;
//边界保护
if ((x < 0) || (x > (width - 1)) || (y < 0) || (y > (height - 1)))
{
block[k][l] = 0;
}
else
{
//从图像中读取像素数据
cv::Vec3b srcpixel = src.at<cv::Vec3b>(y, x);
// 灰度变换,并限幅保护
int gray = (srcpixel[0] * 306 + srcpixel[1] * 601 + srcpixel[2] * 117) / 1024;
gray = (gray > 255) ? 255 : gray;
//将灰度值存入window中的对应位置
block[k][l] = (u8)gray;
}
}
}
//根据核算子的系数,进行乘加运算,分别求出X方向,Y方向,X45方向,Y45方向的核算子乘加运算结果,
u16 x1 = block[0][2] + 2 * block[1][2] + block[2][2];
u16 x2 = block[0][0] + 2 * block[1][0] + block[2][0];
u16 dx = ABS_DEC(x1, x2);
u16 y1 = block[0][0] + 2 * block[0][1] + block[0][2];
u16 y2 = block[2][0] + 2 * block[2][1] + block[2][2];
u16 dy = ABS_DEC(y1, y2);
u16 x45_1 = block[1][2] + 2 * block[2][2] + block[2][1];
u16 x45_2 = block[0][1] + 2 * block[0][0] + block[1][0];
u16 dx45 = ABS_DEC(x45_1, x45_2);
u16 y45_1 = block[0][1] + 2 * block[0][2] + block[1][2];
u16 y45_2 = block[1][0] + 2 * block[2][0] + block[2][2];
u16 dy45 = ABS_DEC(y45_1, y45_2);
//将四个核算子乘加运算结果,进行累加,得到当前点的dsum
u32 dsum = dx + dy + dx45 + dy45;
//判断当前点是否属于ROI,
//如果属于ROI,则将当前点计算出的dsum,累加到sum中。
if ((i >= miny) && (i < maxy) && (j >= minx) && (j < maxx))
{
sum += dsum;
}
}
}
//整幅图像处理完毕后,将sum的值,输出到(*value)中。
*value = sum;
}