【笔记】An ffmpeg and SDL Tutorial 01《视频帧保存到PPM文件》

最新推荐文章于 2017-07-04 11:35:51 发布

kinghzkingkkk

最新推荐文章于 2017-07-04 11:35:51 发布

阅读量594

点赞数

分类专栏：影音视频播放器

本文链接：https://blog.csdn.net/kinghzkingkkk/article/details/70231555

版权

影音视频同时被 2 个专栏收录

14 篇文章 0 订阅

订阅专栏

播放器

4 篇文章 0 订阅

订阅专栏

概述

本节讲述了从一个视频文件中截取图片的过程。参考Git（ https://git.oschina.net/MyFFMpeg/MyPlayer）下的《\src\An-ffmpeg-and-SDL-Tutorial\tutorial01》工程。该工程简单的封装了FFmpeg的数据结构，便于阅读。

媒体文件是由一定的格式组织而成，文件本身成为容器，文件包含了文件的类型，该类型标明了文件的组装形式和其他各种媒体相关的信息。除了文件信息以外其它大部分内容成为“流”，流一般分为音频流和视频流（还有字幕流等）。被切割成一块块的，这样的块被叫做“包”（Packet），通过FFmpeg，我们将流中的“包”读出来，然后通过编解码（ CODEC）模块将“包”转换为“帧”（Frame）。最后我们对“帧”进行不同的操作达到我们想要的效果。本节我们将“帧”保存为PPM格式的图片，PPM格式介绍可以参考文章《

PPM文件格式详解

》。

下面通过两张图片简单介绍下FFMpeg数据结构（其实这些结构也就说明了媒体文件的存储结构），如下图：

FFmpeg库的使用流程很固定，下面是其解码流程图：

初始化FFmpeg库

与很多库一样，FFmpeg需要进行初始化才能使用，整个程序初始化一次即可，对应代码如下：

void my::InitFFmpegLibrary()
{
	av_register_all();
	avformat_network_init();
}

打开文件

打开文件，主要执行了下面的操作：

初始化m_pFormatCtx
获得视音频索引
打开视频解码器

对应于下面代码中的函数：int CMyFFmpeg::Init(const char *pFileName)

class CMyFFmpeg
{
public:
	CMyFFmpeg();
	~CMyFFmpeg();

	// 初始化m_pFormatCtx、m_pCodecCtx等
	int				Init(const char	*pFileName);
	void			Uninit();

	// 打印结构体信息
	void			Log_StructInfo();

	// 获取m_pCodecCtx中的宽高
	int				GetWidth();
	int				GetHeight();

private:
	friend class CMyDecoder;

	std::string				m_strFileName;
	AVFormatContext			*m_pFormatCtx;
	unsigned int			m_nIndexVideo;
	unsigned int			m_nIndexAudio;

	AVCodecContext			*m_pCodecCtx;
	AVCodec					*m_pCodec;					// 准确的说，它只是m_pCodecCtx的一部分，并不是成员变量，只需要释放m_pCodecCtx就可以了
};


int CMyFFmpeg::Init(const char *pFileName)
{
	m_strFileName = pFileName;

	// 初始化m_pFormatCtx
	{
		m_pFormatCtx = avformat_alloc_context();
		if (m_pFormatCtx == NULL)
		{
			return -1;
		}
		if (avformat_open_input(&m_pFormatCtx, pFileName, NULL, NULL) != 0)
		{
			printf("Couldn't open input stream.\n");
			return -2;
		}
		if (avformat_find_stream_info(m_pFormatCtx, NULL) < 0)
		{
			printf("Couldn't find stream information.\n");
			return -3;
		}
	}

	// 获得视音频索引
	{
		m_nIndexVideo = -1;
		m_nIndexAudio = -1;
		for (unsigned int i = 0; i < m_pFormatCtx->nb_streams; i++)
		{
			if (m_pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
			{
				if (m_nIndexVideo == -1)
				{
					m_nIndexVideo = i;
				}
			}
			else if (m_pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
			{
				if (m_nIndexAudio == -1)
				{
					m_nIndexAudio = i;
				}
			}
		}
		if (m_nIndexVideo == -1)
		{
			printf("Didn't find a video stream.\n");
			return -21;
		}
	}

	// 打开视频解码器
	{
		m_pCodecCtx = avcodec_alloc_context3(NULL);
		if (m_pCodecCtx == NULL)
		{
			printf("Could not allocate AVCodecContext\n");
			return -31;
		}
		int nRet = avcodec_parameters_to_context(m_pCodecCtx, m_pFormatCtx->streams[m_nIndexVideo]->codecpar);			// 新的API，替换avcodec_copy_context
		if (nRet != 0)
		{
			printf("Could not allocate AVCodecContext\n");
			return -32;
		}

		m_pCodec = avcodec_find_decoder(m_pCodecCtx->codec_id);			//指向AVCodec的指针.查找解码器
		if (m_pCodec == NULL)
		{
			printf("Codec not found.\n");
			return -33;
		}
		//打开解码器
		if (avcodec_open2(m_pCodecCtx, m_pCodec, NULL) < 0)
		{
			printf("Could not open codec.\n");
			return -34;
		}
	}

	return 0;
}

void CMyFFmpeg::Uninit()
{
	if (m_pCodecCtx)
	{
		avcodec_close(m_pCodecCtx);
		m_pCodecCtx = NULL;
		// avcodec_free_context(&m_pCodecCtx);
	}

	if (m_pFormatCtx)
	{
		avformat_close_input(&m_pFormatCtx);
		m_pFormatCtx = NULL;
	}
}

读取数据AVFrame

函数AVFrame* CMyDecoder::GetFrame()对数据读取和解码进行了封装，代码如下：

class CMyDecoder
{
public:
	CMyDecoder();
	~CMyDecoder();

public:
	int					Init(CMyFFmpeg *pMy, AVPixelFormat fmt);
	int					Uninit();

	AVFrame*			GetFrame();
	AVFrame*			GetFrame2();			// 使用新的API执行

private:
	CMyFFmpeg			*m_pMy;
	SwsContext			*m_pSwsCtx;
	AVFrame				*m_pFrame;				// avcodec_decode_video2 解出来的数据
	AVFrame				*m_pFrameSws;			// Sws转换后的Frame
	uint8_t				*m_pBuff4FrameSws;		// m_pFrameSws对应的缓冲区
	AVPacket			m_packet;
};

int CMyDecoder::Init(CMyFFmpeg *pMy, AVPixelFormat fmt)
{
	m_pMy = pMy;
	if (!m_pMy)
	{
		return -1;
	}
	int width = m_pMy->m_pCodecCtx->width;
	int height = m_pMy->m_pCodecCtx->height;

	// 初始化用到的结构体
	m_pFrame = av_frame_alloc();
	m_pFrameSws = av_frame_alloc();
	m_pBuff4FrameSws = (uint8_t *)av_malloc(avpicture_get_size(fmt, width, height));
	avpicture_fill((AVPicture *)m_pFrameSws, m_pBuff4FrameSws, fmt, width, height);
	m_pSwsCtx = sws_getContext(width, height, m_pMy->m_pCodecCtx->pix_fmt,
		width, height, fmt, SWS_BICUBIC, NULL, NULL, NULL);
	if (!m_pFrame || !m_pFrameSws || !m_pBuff4FrameSws || !m_pSwsCtx)
	{
		return -2;
	}

	return 0;
}

int CMyDecoder::Uninit()
{
	m_pMy = NULL;

	// 释放 m_pFrameSws和m_pBuff4FrameSws
	if (m_pBuff4FrameSws)
	{
		av_free(m_pBuff4FrameSws);
		m_pBuff4FrameSws = NULL;
	}
	if (m_pFrameSws)
	{
		av_frame_free(&m_pFrameSws);
		m_pFrameSws = NULL;
	}

	// 释放 m_pFrame
	if (m_pFrame)
	{
		av_frame_free(&m_pFrame);
		m_pFrame = NULL;
	}

	return 0;
}

AVFrame* CMyDecoder::GetFrame()
{
	bool hasGetFrame = false;

	if (av_read_frame(m_pMy->m_pFormatCtx, &m_packet) < 0)
	{
		return NULL;
	}
	else
	{
		if (m_packet.stream_index == m_pMy->m_nIndexVideo)
		{
			int got_picture = 0;
			int nRet = avcodec_decode_video2(m_pMy->m_pCodecCtx, m_pFrame, &got_picture, &m_packet);
			if (nRet < 0)
			{
				hasGetFrame = false;
			}
			else if (got_picture == 0)
			{
				hasGetFrame = false;
			}
			else
			{
				sws_scale(m_pSwsCtx, (const uint8_t* const*)m_pFrame->data, m_pFrame->linesize, 0,
					m_pMy->m_pCodecCtx->height, m_pFrameSws->data, m_pFrameSws->linesize);
				hasGetFrame = true;
			}
		}

		// av_read_frame读取成功后，每次都需要释放m_packet内部的数据结构
		av_free_packet(&m_packet);
	}

	if (hasGetFrame)
	{
		return m_pFrameSws;
	}
	else
	{
		return NULL;
	}
}

保存文件

根据AVFrame创建PPM文件，代码如下：

void my::File_WritePPM(AVFrame *pFrame, int width, int height, const std::string &strFileName)
{
	std::ofstream ofile;

	// 打开文件
	ofile.open(strFileName.data(), std::ios::binary);
	if (!ofile)
	{
		return;
	}

	// 写文件头
	ofile << "P6\n"
		<< width << " " << height << "\n"
		<< "255\n";

	// 写像素信息
	int nSizePerLine = width * 3;					// 每一个像素有3个字节，每行有width*3个字节
	for (int y = 0; y < height; y++)
	{
		const char *pBuf = (const char *)(pFrame->data[0] + pFrame->linesize[0] * y);
		ofile.write(pBuf, nSizePerLine);
	}

	// 关闭文件
	ofile.close();
}