本例利用之前介绍的基础函数进行功能级实现。
void bayer_photo_accl(T_UINT(BAYER_BPP, BAYER_NPPC)* src, hls::stream<T_AXIS(PHOTO_BPP, PHOTO_NPPC)>& dst, ap_uint<4> index_in, ap_uint<4>& index_out, u16 crop_x, u16 crop_y, u16 crop_width, u16 crop_height, u16 width, u16 height)
{
#pragma HLS INTERFACE m_axi depth=1940800 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=height bundle=CONTROL_BUS
#pragma HLS INTERFACE ap_stable port=index_in
#pragma HLS INTERFACE ap_stable port=crop_x
#pragma HLS INTERFACE ap_stable port=crop_y
#pragma HLS INTERFACE ap_stable port=crop_width
#pragma HLS INTERFACE ap_stable port=crop_height
#pragma HLS INTERFACE ap_stable port=width
#pragma HLS INTERFACE ap_stable port=height
const ap_uint<4> index = index_in - 1;
printf("index:%d\n", (int)index);
index_out = index;
hls::stream<T_UINT(BAYER_BPP, BAYER_NPPC)> inter0("inter0");
hls::stream<T_UINT(PHOTO_BPP, PHOTO_NPPC)> inter1("inter1");
#pragma HLS STREAM variable=inter0 depth=1024
#pragma HLS STREAM variable=inter1 depth=6400
#pragma HLS dataflow
axim2stream_vec<BAYER_BPP, BAYER_HEIGHT, BAYER_WIDTH, BAYER_NPPC, DDR_BUFFER_NUMBER>(src, inter0, index, crop_x, crop_y, crop_width, crop_height, width, height);
stream_nppc_down<BAYER_BPP, PHOTO_HEIGHT, PHOTO_WIDTH, BAYER_NPPC, PHOTO_NPPC>(inter0, inter1, crop_width, crop_height);
stream2axis<PHOTO_BPP, PHOTO_HEIGHT, PHOTO_WIDTH, PHOTO_NPPC>(inter1, dst, crop_width, crop_height);
}
输入源对象是AXIMM,输出目的对象是AXIS的stream。此外,本函数还需要一系列的配置参数。
对于src和dst,被添加了interface约束。
其中,src被实现为m_axi接口,dst则被实现为axis接口。
#pragma HLS INTERFACE m_axi depth=1940800 port=src offset=slave max_read_burst_length=128
#pragma HLS INTERFACE axis register both port=dst
对于return, crop_x ,crop_y ,crop_width ,crop_height ,width ,height ,被添加了interface约束。
它们被实现为s_axilite接口,并绑定到同一个AXILITE总线中,所以,它们被实现为位于CONTROL_BUS这个总线上的具有不同读写地址的REG。
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_x bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_y bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=crop_height bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=width bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port=height bundle=CONTROL_BUS
对于index_in,被添加了interface约束。
它被实现为input接口,且不受到ap_rst的复位影响,所以使用了ap_stable类型。
#pragma HLS INTERFACE ap_stable port=index_in
同样的,crop_x ,crop_y ,crop_width ,crop_height ,width ,height ,我们也不希望它们受到ap_rst的复位影响,一旦MCU写入了REG,除非下一次写入更新,否则值不变。所以,它们也使用了ap_stable类型。
对于index_out,默认被添加了interface约束,使用ap_vld类型。
它被实现为output接口,伴随着vld指示信号。
函数内,定义了两个局部对象,stream的对象,作为中间寄存器。
对它们,添加了stream约束。HLS将它们理解为FIFO。
#pragma HLS STREAM variable=inter0 depth=1024
#pragma HLS STREAM variable=inter1 depth=6400
函数整体处理的调用过程,被添加了dataflow约束。
#pragma HLS DATAFLOW
+++++++++++++++++++++++++++++++++++++++++++++++
然后,再来看看testbench。
#include "hls_opencv.h"
#include "hls_video.h"
#include "bayer_photo_accl.h"
#include "bayer_photo_tb.h"
using namespace cv;
需要使用opencv库,并使用namespace。
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("usage: %s bayer.png\n", argv[0]);
return -1;
}
cv::Mat in_img;
in_img = cv::imread(argv[1], 0);
if (in_img.data == NULL)
{
fprintf(stderr,"Cannot open image at %s\n", argv[1]);
return 0;
}
int width = in_img.size().width;
int height = in_img.size().height;
if ((width != BAYER_WIDTH) || (height != BAYER_HEIGHT))
{
printf("image size is wrong, real:%dx%d, needed:%dx%d \n", width, height, BAYER_WIDTH, BAYER_HEIGHT);
return 0;
}
std::vector<cv::Mat> in_imgs;
in_imgs.resize(DDR_BUFFER_NUMBER);
uchar* ddr_base = (uchar*)malloc(width * height * DDR_BUFFER_NUMBER);
if (!ddr_base)
return 0;
for (int i = 0; i < DDR_BUFFER_NUMBER; i++)
{
in_imgs[i] = cv::Mat(height, width, CV_8U, (uchar *)ddr_base + width * height * i);
in_img.copyTo(in_imgs[i]);
}
ap_uint<4> index_in = 1;
ap_uint<4> index_out = 0;
ap_uint<1> vflip_mode = 0;
int bayer_mode = 3;
int bayer_width = 6400;
int bayer_height = 4852;
int crop_width = 5120;
int crop_height = 4096;
crop_width = (crop_width / BAYER_NPPC) * BAYER_NPPC;
int crop_x = (bayer_width - crop_width) / 2;
int crop_y = (bayer_height - crop_height) / 2;
crop_x = (crop_x / BAYER_NPPC) * BAYER_NPPC;
printf("crop_x=%d, crop_y=%d, crop_width=%d, crop_height=%d, width=%d, height=%d\n", crop_x, crop_y, crop_width, crop_height, width, height);
cv::Mat cv_img;
cv_img.create(crop_height, crop_width, CV_8U);
for (int i = 0; i < crop_height; i++)
{
for (int j = 0; j < crop_width; j++)
{
cv_img.at<uchar>(i, j) = in_img.at<uchar>(crop_y + i, crop_x + j);
}
}
cv::Mat out_img;
out_img.create(crop_height, crop_width, CV_8U);
T_UINT(BAYER_BPP, BAYER_NPPC)* src = (T_UINT(BAYER_BPP, BAYER_NPPC)*)(ddr_base);
hls::stream<T_AXIS(PHOTO_BPP, PHOTO_NPPC)> dst;
bayer_photo_accl(src, dst, index_in, index_out, crop_x, crop_y, crop_width, crop_height, width, height);
axis2cvMat_sign<PHOTO_BPP, PHOTO_NPPC>(dst, out_img);
cv::imwrite("hls.bmp", out_img);
cv::imwrite("cv.bmp", cv_img);
cv::imwrite("image.bmp", in_img);
printf("test ok!\n");
return 0;
}
主体框架何基本框架一样,
这里,使用了自定义的基础函数cvMat2axis。
将cvMat对象的数据抽取出来,并转换成axis数据流对象,向DUT传递。
这里,使用了std::vector容器。用来存储多个对象。