CUDA库之nvjpeg(一)：入门介绍

最新推荐文章于 2024-07-18 17:13:00 发布

hjxu2016

最新推荐文章于 2024-07-18 17:13:00 发布

阅读量1.6k

点赞数 4

分类专栏：编程语言|CUDA入门文章标签：图像处理

本文链接：https://blog.csdn.net/hjxu2016/article/details/137075120

版权

编程语言|CUDA入门专栏收录该内容

19 篇文章 149 订阅

订阅专栏

文章目录

一、介绍
- 1.1 nvJPEG解码器
二、JPEG解码
- 2.1 单张图像解码
三、完整代码

nvJPEG: 一个GPU加速的JPEG编解码库
官方文档： https://docs.nvidia.com/cuda/nvjpeg/index.html
官方教程：https://github.com/NVIDIA/CUDALibrarySamples/tree/master/nvJPEG

一、介绍

1.1 nvJPEG解码器

nvJPEG 库提供了高性能，GPU加速的JPEG图像格式的解码函数，并且普遍应用在深度学习领域和超大规模的多媒体应用中。

这个库提供单张图或者多张图同时解码的能力，可以充分利用GPU资源和优化效率，并且使用者也可以管理需要解码的内存，灵活性还是比较强的。

nvJPEG大体上依次使用以下函数：

使用JPEG图像数据流作为输入
从数据流中获取图像的宽和高
使用以上获取的信息来管理GPU内存并执行解码操作

nvJPEG提供了专用的API，用于从原始JPEG图像数据流中检索图像信息。

二、JPEG解码

nvJPEG库提供了单张图像解码和批量解码的函数

2.1 单张图像解码

先加载图像文件，以及图像文件的大小，输出就是解码后的buffer
以下是执行步骤

step0. 加载图像数据

	std::string strFileName = "./input_images/img2.jpg";
	// Read an image from disk.
	std::ifstream input(strFileName.c_str(),
		std::ios::in | std::ios::binary | std::ios::ate);
	if (!(input.is_open())) {
		std::cerr << "Cannot open image: " << strFileName
			<< ", removing it from image list" << std::endl;
	}
	// Get the size
	std::streamsize file_size = input.tellg();
	input.seekg(0, std::ios::beg);

	 char* pchData = (char*)malloc(file_size);
	if (!input.read(pchData, file_size)) {
		std::cerr << "Cannot read from file: " << strFileName
			<< ", removing it from image list" << std::endl;

	}

step1. 创建nvJPEG库的句柄（有两个函数nvjpegCreateSimple() or nvjpegCreateEx()）

	nvjpegHandle* nvjpegHandle = nullptr;
	CHECK_NVJPEG(nvjpegCreateSimple(&nvjpegHandle));

step2. 调用nvjpegJpegStateCreate()函数，创建nvJPEG的状态

	nvjpegJpegState* nvjpegJpegState = nullptr;
	CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpegHandle, &nvjpegJpegState));

除了以上外，还有其他的一些函数，也是有帮助的，这里先列举，可暂时忽略

nvjpegStatus_t nvjpegGetProperty(libraryPropertyType type, int *value);

[DEPRECATED] nvjpegStatus_t nvjpegCreate(nvjpegBackend_t backend, nvjpegHandle_t *handle , nvjpeg_dev_allocator allocator);

nvjpegStatus_t nvjpegCreateSimple(nvjpegHandle_t *handle);

nvjpegStatus_t nvjpegCreateEx(nvjpegBackend_t backend, nvjpegDevAllocator_t *dev_allocator, nvjpegPinnedAllocator_t *pinned_allocator, unsigned int flags, nvjpegHandle_t *handle);

nvjpegStatus_t nvjpegDestroy(nvjpegHandle_t handle);

nvjpegStatus_t nvjpegJpegStateCreate(nvjpegHandle_t handle, nvjpegJpegState_t *jpeg_handle);

nvjpegStatus_t nvjpegJpegStateDestroy(nvjpegJpegState handle);

step3. 使用nvjpegGetImageInfo()从数据流中获取获取宽高
函数的参数如下：

nvjpegStatus_t nvjpegGetImageInfo(
nvjpegHandle_t              handle, // 句柄
const unsigned char         *data,	// 文件数据
size_t                      length, // 文件长度
int                         *nComponents, // 通道
nvjpegChromaSubsampling_t   *subsampling, // 色度的二次采样信息
int                         *widths, // 宽
int                         *heights); // 高

调用案例如下：

	int channels = 0;
	int width = 0;
	int height = 0;
	nvjpegChromaSubsampling_t subsampling;
	CHECK_NVJPEG(nvjpegGetImageInfo(nvjpegHandle, (const unsigned char*)pchData, file_size, &channels, &subsampling, &width, &height));

step4. 使用 nvjpegDecode()函数来解码单张JPEG图像，函数介绍如下：

nvjpegStatus_t nvjpegDecode(
nvjpegHandle_t          handle, // 库句柄
nvjpegJpegState_t       jpeg_handle, //状态句柄
const unsigned char     *data, // 输入的文件数据
size_t                  length, // 输入的文件数据长度
nvjpegOutputFormat_t    output_format,// 解码格式
nvjpegImage_t           *destination, // 输出解码后的buffer信息
cudaStream_t            stream); // 流

这里注意，nvjpegImage_t里的内存，需要自己管理和创建

	cudaStream_t stream;

	CHECK_CUDA(cudaStreamCreate(&stream));

	nvjpegImage_t dstImage;
	memset(&dstImage, 0, sizeof(nvjpegImage_t));
	// realloc output buffer if required
	int mul = 3;
	channels = 1;
	for (int c = 0; c < channels; c++) {
		int aw = mul * width;
		int ah = height;
		int sz = aw * ah;
		dstImage.pitch[c] = aw;
		//if (sz > dstImage.pitch[c]) {
		/*	if (dstImage.channel[c]) {
				CHECK_CUDA(cudaFree(dstImage.channel[c]));
			}*/
			CHECK_CUDA(cudaMalloc((void**)&dstImage.channel[c], sz));
			//dstImage.pitch[c] = sz;
		//}
	}

	nvjpegOutputFormat_t outFormat = nvjpegOutputFormat_t::NVJPEG_OUTPUT_BGRI;
	CHECK_NVJPEG(nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
		outFormat, &dstImage, stream));
	//CHECK_CUDA(cudaStreamSynchronize(stream));
	int sz = height * width * sizeof(unsigned char);
	unsigned char* pvB = (unsigned char*)malloc(sz *3);
	/*unsigned char* pvG = (unsigned char*)malloc(sz);
	unsigned char* pvR = (unsigned char*)malloc(sz);*/
	CHECK_CUDA(cudaMemcpy(pvB, dstImage.channel[0], sz*3, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvG, dstImage.channel[1], sz, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvR, dstImage.channel[2], sz, cudaMemcpyDeviceToHost));
	cv::Mat B = cv::Mat(cv::Size(width, height), CV_8UC3, pvB);
	//cv::Mat G = cv::Mat(cv::Size(width, height), CV_8UC1, pvG);
	//cv::Mat R = cv::Mat(cv::Size(width, height), CV_8UC1, pvR);
	cv::imwrite("H:/temp/rgb.bmp", B);

三、完整代码

注意，没有考虑现存的释放等操作，仅参考用

#include <iostream>
#include <fstream>
#include "opencv2/opencv.hpp"
#include "nvjpegDecoder.h"
int main()
{
	nvjpegHandle* nvjpegHandle = nullptr;
	CHECK_NVJPEG(nvjpegCreateSimple(&nvjpegHandle));

	nvjpegJpegState* nvjpegJpegState = nullptr;
	CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpegHandle, &nvjpegJpegState));

	std::string strFileName = "H:/github/CUDALibrarySamples/nvJPEG/nvJPEG-Decoder/input_images/img2.jpg";
	// Read an image from disk.
	std::ifstream input(strFileName.c_str(),
		std::ios::in | std::ios::binary | std::ios::ate);
	if (!(input.is_open())) {
		std::cerr << "Cannot open image: " << strFileName
			<< ", removing it from image list" << std::endl;
	}
	// Get the size
	std::streamsize file_size = input.tellg();
	input.seekg(0, std::ios::beg);

	 char* pchData = (char*)malloc(file_size);
	if (!input.read(pchData, file_size)) {
		std::cerr << "Cannot read from file: " << strFileName
			<< ", removing it from image list" << std::endl;

	}

	int channels = 0;
	int width = 0;
	int height = 0;
	nvjpegChromaSubsampling_t subsampling;
	CHECK_NVJPEG(nvjpegGetImageInfo(nvjpegHandle, (const unsigned char*)pchData, file_size, &channels, &subsampling, &width, &height));
	cudaStream_t stream;

	CHECK_CUDA(cudaStreamCreate(&stream));

	nvjpegImage_t dstImage;
	memset(&dstImage, 0, sizeof(nvjpegImage_t));
	// realloc output buffer if required
	int mul = 3;
	channels = 1;
	for (int c = 0; c < channels; c++) {
		int aw = mul * width;
		int ah = height;
		int sz = aw * ah;
		dstImage.pitch[c] = aw;
		//if (sz > dstImage.pitch[c]) {
		/*	if (dstImage.channel[c]) {
				CHECK_CUDA(cudaFree(dstImage.channel[c]));
			}*/
			CHECK_CUDA(cudaMalloc((void**)&dstImage.channel[c], sz));
			//dstImage.pitch[c] = sz;
		//}
	}

	nvjpegOutputFormat_t outFormat = nvjpegOutputFormat_t::NVJPEG_OUTPUT_BGRI;
	CHECK_NVJPEG(nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
		outFormat, &dstImage, stream));
	//CHECK_CUDA(cudaStreamSynchronize(stream));
	int sz = height * width * sizeof(unsigned char);
	unsigned char* pvB = (unsigned char*)malloc(sz *3);
	/*unsigned char* pvG = (unsigned char*)malloc(sz);
	unsigned char* pvR = (unsigned char*)malloc(sz);*/
	CHECK_CUDA(cudaMemcpy(pvB, dstImage.channel[0], sz*3, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvG, dstImage.channel[1], sz, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvR, dstImage.channel[2], sz, cudaMemcpyDeviceToHost));
	cv::Mat B = cv::Mat(cv::Size(width, height), CV_8UC3, pvB);
	//cv::Mat G = cv::Mat(cv::Size(width, height), CV_8UC1, pvG);
	//cv::Mat R = cv::Mat(cv::Size(width, height), CV_8UC1, pvR);
	cv::imwrite("H:/temp/rgb.bmp", B);
	//cv::imwrite("H:/temp/g.bmp", G);
	//cv::imwrite("H:/temp/b.bmp", B);

	printf("hello world！");
	return 0;
}

hjxu2016

关注

4
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
CUDA库之nvjpeg(一)：入门介绍

nvJPEG 库提供了高性能，GPU加速的JPEG图像格式的解码函数，并且普遍应用在深度学习领域和超大规模的多媒体应用中。这个库提供单张图或者多张图同时解码的能力，可以充分利用GPU资源和优化效率，并且使用者也可以管理需要解码的内存，灵活性还是比较强的。使用JPEG图像数据流作为输入从数据流中获取图像的宽和高使用以上获取的信息来管理GPU内存并执行解码操作nvJPEG提供了专用的API，用于从原始JPEG图像数据流中检索图像信息。
复制链接

扫一扫