- nvJPEG: 一个GPU加速的JPEG编解码库
- 官方文档: https://docs.nvidia.com/cuda/nvjpeg/index.html
- 官方教程:https://github.com/NVIDIA/CUDALibrarySamples/tree/master/nvJPEG
一、介绍
1.1 nvJPEG解码器
nvJPEG 库提供了高性能,GPU加速的JPEG图像格式的解码函数,并且普遍应用在深度学习领域和超大规模的多媒体应用中。
这个库提供单张图或者多张图同时解码的能力,可以充分利用GPU资源和优化效率,并且使用者也可以管理需要解码的内存,灵活性还是比较强的。
nvJPEG大体上依次使用以下函数:
- 使用JPEG图像数据流作为输入
- 从数据流中获取图像的宽和高
- 使用以上获取的信息来管理GPU内存并执行解码操作
nvJPEG提供了专用的API,用于从原始JPEG图像数据流中检索图像信息。
二、JPEG解码
nvJPEG库提供了单张图像解码和批量解码的函数
2.1 单张图像解码
先加载图像文件,以及图像文件的大小,输出就是解码后的buffer
以下是执行步骤
- step0. 加载图像数据
std::string strFileName = "./input_images/img2.jpg";
// Read an image from disk.
std::ifstream input(strFileName.c_str(),
std::ios::in | std::ios::binary | std::ios::ate);
if (!(input.is_open())) {
std::cerr << "Cannot open image: " << strFileName
<< ", removing it from image list" << std::endl;
}
// Get the size
std::streamsize file_size = input.tellg();
input.seekg(0, std::ios::beg);
char* pchData = (char*)malloc(file_size);
if (!input.read(pchData, file_size)) {
std::cerr << "Cannot read from file: " << strFileName
<< ", removing it from image list" << std::endl;
}
- step1. 创建nvJPEG库的句柄(有两个函数
nvjpegCreateSimple()
ornvjpegCreateEx()
)
nvjpegHandle* nvjpegHandle = nullptr;
CHECK_NVJPEG(nvjpegCreateSimple(&nvjpegHandle));
- step2. 调用
nvjpegJpegStateCreate()
函数,创建nvJPEG的状态
nvjpegJpegState* nvjpegJpegState = nullptr;
CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpegHandle, &nvjpegJpegState));
除了以上外,还有其他的一些函数,也是有帮助的,这里先列举,可暂时忽略
nvjpegStatus_t nvjpegGetProperty(libraryPropertyType type, int *value);
[DEPRECATED] nvjpegStatus_t nvjpegCreate(nvjpegBackend_t backend, nvjpegHandle_t *handle , nvjpeg_dev_allocator allocator);
nvjpegStatus_t nvjpegCreateSimple(nvjpegHandle_t *handle);
nvjpegStatus_t nvjpegCreateEx(nvjpegBackend_t backend, nvjpegDevAllocator_t *dev_allocator, nvjpegPinnedAllocator_t *pinned_allocator, unsigned int flags, nvjpegHandle_t *handle);
nvjpegStatus_t nvjpegDestroy(nvjpegHandle_t handle);
nvjpegStatus_t nvjpegJpegStateCreate(nvjpegHandle_t handle, nvjpegJpegState_t *jpeg_handle);
nvjpegStatus_t nvjpegJpegStateDestroy(nvjpegJpegState handle);
- step3. 使用
nvjpegGetImageInfo()
从数据流中获取获取宽高
函数的参数如下:
nvjpegStatus_t nvjpegGetImageInfo(
nvjpegHandle_t handle, // 句柄
const unsigned char *data, // 文件数据
size_t length, // 文件长度
int *nComponents, // 通道
nvjpegChromaSubsampling_t *subsampling, // 色度的二次采样信息
int *widths, // 宽
int *heights); // 高
调用案例如下:
int channels = 0;
int width = 0;
int height = 0;
nvjpegChromaSubsampling_t subsampling;
CHECK_NVJPEG(nvjpegGetImageInfo(nvjpegHandle, (const unsigned char*)pchData, file_size, &channels, &subsampling, &width, &height));
- step4. 使用
nvjpegDecode()
函数来解码单张JPEG图像,函数介绍如下:
nvjpegStatus_t nvjpegDecode(
nvjpegHandle_t handle, // 库句柄
nvjpegJpegState_t jpeg_handle, //状态句柄
const unsigned char *data, // 输入的文件数据
size_t length, // 输入的文件数据长度
nvjpegOutputFormat_t output_format,// 解码格式
nvjpegImage_t *destination, // 输出解码后的buffer信息
cudaStream_t stream); // 流
这里注意,nvjpegImage_t里的内存,需要自己管理和创建
cudaStream_t stream;
CHECK_CUDA(cudaStreamCreate(&stream));
nvjpegImage_t dstImage;
memset(&dstImage, 0, sizeof(nvjpegImage_t));
// realloc output buffer if required
int mul = 3;
channels = 1;
for (int c = 0; c < channels; c++) {
int aw = mul * width;
int ah = height;
int sz = aw * ah;
dstImage.pitch[c] = aw;
//if (sz > dstImage.pitch[c]) {
/* if (dstImage.channel[c]) {
CHECK_CUDA(cudaFree(dstImage.channel[c]));
}*/
CHECK_CUDA(cudaMalloc((void**)&dstImage.channel[c], sz));
//dstImage.pitch[c] = sz;
//}
}
nvjpegOutputFormat_t outFormat = nvjpegOutputFormat_t::NVJPEG_OUTPUT_BGRI;
CHECK_NVJPEG(nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
outFormat, &dstImage, stream));
//CHECK_CUDA(cudaStreamSynchronize(stream));
int sz = height * width * sizeof(unsigned char);
unsigned char* pvB = (unsigned char*)malloc(sz *3);
/*unsigned char* pvG = (unsigned char*)malloc(sz);
unsigned char* pvR = (unsigned char*)malloc(sz);*/
CHECK_CUDA(cudaMemcpy(pvB, dstImage.channel[0], sz*3, cudaMemcpyDeviceToHost));
//CHECK_CUDA(cudaMemcpy(pvG, dstImage.channel[1], sz, cudaMemcpyDeviceToHost));
//CHECK_CUDA(cudaMemcpy(pvR, dstImage.channel[2], sz, cudaMemcpyDeviceToHost));
cv::Mat B = cv::Mat(cv::Size(width, height), CV_8UC3, pvB);
//cv::Mat G = cv::Mat(cv::Size(width, height), CV_8UC1, pvG);
//cv::Mat R = cv::Mat(cv::Size(width, height), CV_8UC1, pvR);
cv::imwrite("H:/temp/rgb.bmp", B);
三、完整代码
注意,没有考虑现存的释放等操作,仅参考用
#include <iostream>
#include <fstream>
#include "opencv2/opencv.hpp"
#include "nvjpegDecoder.h"
int main()
{
nvjpegHandle* nvjpegHandle = nullptr;
CHECK_NVJPEG(nvjpegCreateSimple(&nvjpegHandle));
nvjpegJpegState* nvjpegJpegState = nullptr;
CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpegHandle, &nvjpegJpegState));
std::string strFileName = "H:/github/CUDALibrarySamples/nvJPEG/nvJPEG-Decoder/input_images/img2.jpg";
// Read an image from disk.
std::ifstream input(strFileName.c_str(),
std::ios::in | std::ios::binary | std::ios::ate);
if (!(input.is_open())) {
std::cerr << "Cannot open image: " << strFileName
<< ", removing it from image list" << std::endl;
}
// Get the size
std::streamsize file_size = input.tellg();
input.seekg(0, std::ios::beg);
char* pchData = (char*)malloc(file_size);
if (!input.read(pchData, file_size)) {
std::cerr << "Cannot read from file: " << strFileName
<< ", removing it from image list" << std::endl;
}
int channels = 0;
int width = 0;
int height = 0;
nvjpegChromaSubsampling_t subsampling;
CHECK_NVJPEG(nvjpegGetImageInfo(nvjpegHandle, (const unsigned char*)pchData, file_size, &channels, &subsampling, &width, &height));
cudaStream_t stream;
CHECK_CUDA(cudaStreamCreate(&stream));
nvjpegImage_t dstImage;
memset(&dstImage, 0, sizeof(nvjpegImage_t));
// realloc output buffer if required
int mul = 3;
channels = 1;
for (int c = 0; c < channels; c++) {
int aw = mul * width;
int ah = height;
int sz = aw * ah;
dstImage.pitch[c] = aw;
//if (sz > dstImage.pitch[c]) {
/* if (dstImage.channel[c]) {
CHECK_CUDA(cudaFree(dstImage.channel[c]));
}*/
CHECK_CUDA(cudaMalloc((void**)&dstImage.channel[c], sz));
//dstImage.pitch[c] = sz;
//}
}
nvjpegOutputFormat_t outFormat = nvjpegOutputFormat_t::NVJPEG_OUTPUT_BGRI;
CHECK_NVJPEG(nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
outFormat, &dstImage, stream));
//CHECK_CUDA(cudaStreamSynchronize(stream));
int sz = height * width * sizeof(unsigned char);
unsigned char* pvB = (unsigned char*)malloc(sz *3);
/*unsigned char* pvG = (unsigned char*)malloc(sz);
unsigned char* pvR = (unsigned char*)malloc(sz);*/
CHECK_CUDA(cudaMemcpy(pvB, dstImage.channel[0], sz*3, cudaMemcpyDeviceToHost));
//CHECK_CUDA(cudaMemcpy(pvG, dstImage.channel[1], sz, cudaMemcpyDeviceToHost));
//CHECK_CUDA(cudaMemcpy(pvR, dstImage.channel[2], sz, cudaMemcpyDeviceToHost));
cv::Mat B = cv::Mat(cv::Size(width, height), CV_8UC3, pvB);
//cv::Mat G = cv::Mat(cv::Size(width, height), CV_8UC1, pvG);
//cv::Mat R = cv::Mat(cv::Size(width, height), CV_8UC1, pvR);
cv::imwrite("H:/temp/rgb.bmp", B);
//cv::imwrite("H:/temp/g.bmp", G);
//cv::imwrite("H:/temp/b.bmp", B);
printf("hello world!");
return 0;
}