瑞芯微rk3399使用rga图形加速处理后拷贝到普通内存耗时大的解决方案
rga,mpp,drm,un-cached
rga图形加速引擎处理图形很快:rk3399开发板自带rga(图形加速引擎)和mpp编解码。在MPP解码->图像后处理->显示整个流程中,RGA都是直接访问内存物理地址,整个过程零内存拷贝,效率很高。(都是在drm内存上完成,drm内存默认non-cache)。
从rga的drm内存拷贝图片到普通内存很慢:其他任何软件以cpu访问该段内存都会因为non-cached导致不必要的损失
解决方案:
将rga-API替换
老版rga-API:
extern "C"
{
#include "rockchip_rga/rockchip_rga.h"
}
dec_rga->ops->setSrcFormat(dec_rga, V4L2_PIX_FMT_NV12, width, ver_stride);
dec_rga->ops->setDstFormat(dec_rga, V4L2_PIX_FMT_RGB24, width, height);
dec_rga->ops->setSrcCrop(dec_rga, 0, 0, width, height);
dec_rga->ops->setSrcBufferPtr(dec_rga, (unsignedchar*)mpp_buffer_get_ptr(buffer));
dec_rga->ops->setDstBufferPtr(dec_rga, (unsigned char*)rgb_buffer);
dec_rga->ops->go(dec_rga);//rga处理(resize,cvtcolor)很快
img_rgb = cv::Mat(height, width, CV_8UC3, (uchar*)(rgb_buffer), cv::Mat::AUTO_STEP).clone();//从rga-drm memory拷贝到普通内存很慢
新rga-API:
#include "RgaUtils.h"
#include "im2d_api/im2d.hpp"
#include "rga.h"
src_buffer = wrapbuffer_virtualaddr((char*)mpp_buffer_get_ptr(buff), width, ver_stride, src_format);
mid_buffer = wrapbuffer_virtualaddr((char*)malloc(width * height * get_bpp_from_format(src_format)), width,height, src_format);
im_rect src_rect;
src_rect.x = 0;
src_rect.y = 0;
src_rect.width = width;
src_rect.height = height;
//对齐后的图片crop
status = imcrop(src_buffer, mid_buffer, src_rect);
//cvtcolor+resize
status = imcvtcolor(mid_buffer, out_buffer , mid_buffer.format, out_buffer.format);
if(mid_buffer.vir_addr != NULL) {
free(mid_buffer.vir_addr);
}//rga处理(resize,cvtcolor)很快
img_rgb=cv::Mat(dst_height, dst_width, CV_8UC3, out_buffer.vir_addr).clone();//从rga-cache memory拷贝到普通内存很快
```cpp