ffmpeg使用scale_npp进行AV_PIX_FMT_CUDA到AV_PIX_FMT_YUV420P的转换

最新推荐文章于 2024-06-30 11:30:21 发布

疼疼国王

最新推荐文章于 2024-06-30 11:30:21 发布

阅读量1.6k

点赞数

分类专栏： ffmpeg 文章标签： c++ 音视频 ffmpeg

本文链接：https://blog.csdn.net/sinat_36304757/article/details/127498433

版权

本文介绍了如何使用ffmpeg的scale_npp滤镜进行AV_PIX_FMT_CUDA到AV_PIX_FMT_YUV420P的视频帧转换。通过对比使用CPU的av_hwframe_transfer_data和sws_scale，强调了GPU转换以提高转码速度的重要性。虽然使用scale_npp在GPU上进行转换的fps略有提升，但效率提升不显著，可能存在的代码优化问题有待进一步研究。

摘要由CSDN通过智能技术生成

一、关于filter的基础命令

《FFmpeg filter的使用介绍》

1. 列出所有filter

ffmpeg -filters

2. 查看某个filter的详细信息

ffmpeg -h filter=scale_npp

3. 实现画面切分为两半并将上半边翻转(本条命令比较全面的展示了filter的使用语法)

ffmpeg -i INPUT -vf "split [main][tmp]; [tmp] crop=iw:ih/2:0:0, vflip [flip]; [main][flip] overlay=0:H/2" OUTPUT

用到了四个filter: split,crop,vflip,overlay
split的输入是解码后的数据，crop的输入是tmp, vflip的输出是flip,overlay的输入是main和flip
=(等号)后边为本filter的参数，每个参数用分号分隔，顺序需要按照说明文档中说的参数顺序，例如overlay的参数x=0, y=H/2，也可以写为 [main][flip] overlay=x=0:y=H/2
是对输出取的别名，用于后续的filter使用

4. 抽取视频Y、U、V分量到不同的文件

ffmpeg -i jack.mp4 -filter_complex "extractplanes=y+u+v[y][u][v]" -map "[y]" jack_y.mp4 -map "[u]" jack_u.mp4 -map "[v]" jack_v.mp4

5. 使用scale_npp进行转码

ffmpeg -vsync 0 -hwaccel_device 0 -hwaccel cuda -hwaccel_output_format cuda -i ~/vedio/drone1.flv -vf "scale_npp=format=yuv420p,hwdownload,format=yuv420p" ff22cuda2.yuv

二、AV_PIX_FMT_CUDA到AV_PIX_FMT_BGR24的转码

1. 背景

当使用cuda进行h264解码硬件加速时，通过avcodec_receive_frame函数得到的解码过后的视频帧格式为AV_PIX_FMT_CUDA，需要调用av_hwframe_transfer_data函数将显存中的CUDA格式的帧转存到内存中，并且转存的结果为NV12格式，如果自己需要的帧格式为BGR24,则需要调用sws_scale进行转码，可惜的是通过sws_scale进行NV12到BGR24的效率非常低。
因上，想要通过scale_npp在GPU中直接进行CUDA->YUV420P, 再使用CPU进行YUV420P-> BGR24转换，以加快转码的速度。

2. 使用cpu

av_hwframe_transfer_data从显存中取数据+sws_scale转码，每帧耗时13ms,fps=50

3. 使用GPU(filter = scale_npp)

代码（不确定是否完全正确）

 /**
  * @file   cudaDecoder.cpp
  * @brief  再cuda进行硬解码的基础， 将转码部分也改为cuda加速 使用filter scale_npp
  * @date   2022/09/29
  * @note   matters needing attention
  * @version 1.0
  */

#include <stdio.h>
#include <stdlib.h>
extern "C"
{
   
#include "libavcodec/avcodec.h"
#include "libavfilter/avfilter.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libavutil/ffversion.h"
#include "libswresample/swresample.h"
#include "libswscale/swscale.h"
#include "libpostproc/postprocess.h"
#include "libavutil/imgutils.h"
#include "libavutil/hwcontext.h"
#include "libavutil/pixdesc.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include "libavfilter/buffersrc.h"
#include "libavfilter/buffersink.h"
}

#include <chrono>
#include <iostream>
using namespace std::chrono;


AVFilterGraph* filter_graph;
AVFilterContext* buffersink_ctx;
AVFilterContext* buffersrc_ctx;

static enum AVPixelFormat hw_pix_fmt;

static AVBufferRef* hw_device_ctx = NULL;

AVPixelFormat get_hw_format(AVCodecContext* ctx, const AVPixelFormat* pix_fmts)
{
   
	const enum AVPixelFormat* p;

	for (p = pix_fmts; *p != -1; p++) {
   
		if (*p == hw_pix_fmt)
			return *p;
	}

	fprintf(stderr, "Failed to get HW surface format.\n");
	return AV_PIX_FMT_NONE;
}

static int hw_decoder_init(AVCodecContext* ctx, const enum AVHWDeviceType type)
{
   
	int err = 0;
	if ((err = av_hwdevice_ctx_create(&hw_device_ctx, type,
		NULL, NULL, 0)) < 0) {
   
		fprintf(stderr, "Failed to create specified HW device.\n");
		return err;
	}
	ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
	return err;
}


//将FFmpeg解码后的数据保存到本地文件
void saveFrame(AVFrame* pFrame, int width, int height, int iFrame)
{
   
	FILE* pFile;
	char szFilename[256];
	int y;

	// 打开文件
	//sprintf(szFilename, "/home/chao/testffmpeg/frame%d.ppm", iFrame);
	sprintf(szFilename, "/home/sports/Downloads/CL/media/frame%d.ppm", iFrame);//Micro_start
	pFile = fopen(szFilename, "wb");
	if (pFile == NULL)
		return;

	// 写入文件头
	fprintf(pFile, "P6\n%d %d\n255\n", width, height);

	// 写入像素数据
	for (y = 0; y < height; y++)
		fwrite(pFrame->data[0] + y * pFrame->linesize[0], 1, width * 3, pFile);
	//fwrite(pFrame->data[0] + y * 11520, 1, width * 3, pFile);

// 关闭文件
	fclose