【FFmpeg(2016)】视频文件分离器(demuxing)——H264&PCM

最新推荐文章于 2024-02-27 17:24:09 发布

weilin.jiang

最新推荐文章于 2024-02-27 17:24:09 发布

阅读量2.5k

点赞数 1

分类专栏： FFmpeg 文章标签： ffmpeg 2016 AAC pcm

本文链接：https://blog.csdn.net/Jammg/article/details/52684061

版权

FFmpeg 专栏收录该内容

10 篇文章 1 订阅

订阅专栏

【前言】

本文章主要对视频文件的视频流、音频流进行分离，并解码音频位原始数据PCM。

【FFmpeg数据结构】

AVFormatContext    
AVStream         
AVCodecContext          
AVCodec
AVFrame
AVPacket
AVBitStreamFilterContext

这些数据结构关系如下：

AVFormatContext
{
    int nb_streams;
    AVStream *streams[];
};

AVStream
{
    AVCodecContext*;
};

AVCodecContext
{
    AVCodecID ;
};

AVFormat是一个关系文件的结构，包含了对整个文件操作的最全信息。

AVStream则是AVFormatContext里的流，比如一个视频文件有视频流、音频流，那么指针数组就包含了指向这些流信息的指针，文件一些流信息就保存在AVStream里。那到底一个文件有多少个流呢，那就看AVFormatContext的nb_streams成员。

AVCodec是一个编码器，可以单纯的理解为一个编解码算法的结构。

AVCodecContext是AVCodec的一个上下文，打个比如，在视频编码h264时，有i p b三种帧，如果有一个视频流是 I B B P这种顺序到达，由于B帧需要依靠前后的帧来计算出本帧现实的内容，所有需要一些buffer保存一些，以根据这些来计算出B帧的内容，当然还有很多其他的内容。

AVCodecID是编码器的ID，如编码AAC是，就使用AV_CODEC_ID_AAC。

AVFrame 是编码前、解码后保存的数据。

AVPacket是编码后、解码前保存的数据。

AVBitStreamFilterContext数据结构则在保存h264时用到，主要是为添加0x00000001头和SPS,PPS信息，因为单纯的从视频文件video流获取不到SPS PPS这些信息。

【代码】

extern "C"
{
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libavcodec/avcodec.h"
#include "libavutil/frame.h"
#include "libavutil/samplefmt.h"
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
}

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")

/* 将VIDEO解为H264和PCM */
int main()
{

	int video_idx, audio_idx;	                        
	FILE *video_fp = NULL,*audio_fp_pcm = NULL;
	AVFormatContext *pFormatCtx = NULL;
	AVCodecContext *pCodecCtx = NULL;
    AVFrame *pframe;
	AVPacket packet;
    int got;
    char *data = NULL;

	video_fp = fopen("video.h264", "ab");	
	audio_fp_pcm = fopen("audio.pcm", "ab");

    /* 注册所有codec 和 format,必须在第一步进行 */
	av_register_all();

    /* 打开文件，此时和AVFormatContext关联 */
	avformat_open_input(&pFormatCtx, m_urlstr.c_str(), NULL, NULL);
    /* 查找一些流信息，并将其信息保存在AVFormatContext结构体 */
	avformat_find_stream_info(pFormatCtx, NULL);

    /* 查找流 */
	for (int i = 0; i < pFormatCtx->nb_streams; ++i)
	{
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
			video_idx = i;
		else if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
			audio_idx = i;
	}

    /* 打开音频流的编码器 */
	pCodecCtx = pFormatCtx->streams[audio_idx]->codec;
	AVCodec *codec = avcodec_find_decoder(pCodecCtx->codec_id);
	avcodec_open2(pCodecCtx, codec, NULL);


    /* 分配AVFrame */
	pframe = av_frame_alloc();
	

	//获取PPS,SPS,packet不能获取到此数据
	unsigned char *dummy = NULL;
	int dummy_len;
	AVBitStreamFilterContext *bsfc = av_bitstream_filter_init("h264_mp4toannexb");

	//packet前4个字节是长度，并且没有0x00000001的前缀，不是真正的264
	char prefix[] = { 0,0,0,1 };
	while (av_read_frame(pFormatCtx, &packet) >= 0) //从文件读取数据
	{
		if (packet.stream_index == video_idx)
		{
			av_bitstream_filter_filter(bsfc,
				pFormatCtx->streams[video_idx]->codec,
				NULL,
				&packet.data, &packet.size,
				packet.data, packet.size,	
				0);
			fwrite(packet.data, packet.size, 1, video_fp);      
		}
		else if (packet.stream_index == audio_idx)
		{
            /* 解码音频,pframe获取到解码后的数据 */
            avcodec_decode_audio4(pCodecCtx, pframe, &got, &packet);

            int data_size = av_samples_get_buffer_size(
                NULL,
                pCodecCtx->channels,
                pframe->nb_samples, //number of audio samples (per channel) described by this frame
                pCodecCtx->sample_fmt,
                1);

            qDebug() << "sample format --------------> " << pCodecCtx->sample_fmt;	    //8:AV_SAMPLE_FMT_FLTP
            qDebug() << "channels -------------------> " << pCodecCtx->channels;	    //2
            qDebug() << "nb_samples -----------------> " << pframe->nb_samples;		    //2048 ，在这一帧中，每个声道的采样个数
            qDebug() << "channel_layout -------------> " << pCodecCtx->channel_layout;  //3
            qDebug() << "sample rate ----------------> " << pCodecCtx->sample_rate;     //44100,每秒采样个数（所有声道采样数之和）
            qDebug() << "bit rate -------------------> " << pCodecCtx->bit_rate;       
            qDebug() << "data_size ---- -------------> " << data_size ;                 //8192*2
            // 对于音频，获取到一帧的数据的长度
            qDebug() << "linesize[0] ----------------> " << pframe->linesize[0];        //8192,两个声道都是8192字节
            qDebug() << "pframe format --------------> " << pframe->format;             //8:AV_SAMPLE_FMT_FLTP
            //也就是说data存下了两个声道各一帧的数据

            if (data == NULL)
            {
                data = (char *)malloc(data_size);
            }
            memset(data, 0, data_size);
            uint32_t *l = (uint32_t *)pframe->extended_data[0];         //左声道数据
            uint32_t *r = (uint32_t *)pframe->extended_data[1];         //右声道数据

            //这是针对AV_SAMPLE_FMT_FLT（每个采样点占4byte）的写入方式,这样ffplay才能播放，也就是左右采样点交换写入

            for (int i = 0, j = 0; i < data_size; i += 8, j++)
            {
                data[i] = (char)(r[j] & 0xff);
                data[i + 1] = (char)(r[j] >> 8 & 0xff);
                data[i + 2] = (char)(r[j] >> 16 & 0xff);
                data[i + 3] = (char)(r[j] >> 24 & 0xff);

                data[i + 4] = (char)(l[j] & 0xff);
                data[i + 5] = (char)(l[j] >> 8 & 0xff);
                data[i + 6] = (char)(l[j] >> 16 & 0xff);
                data[i + 7] = (char)(l[j] >> 24 & 0xff);
            }


			fwrite(data, data_size, 1, audio_fp_pcm);
				
	}
		
	av_free_packet(&packet);
	}

	av_bitstream_filter_close(bsfc);
	fclose(video_fp);
	fclose(audio_fp);
	fclose(audio_fp_pcm);
	avcodec_close(pCodecCtx);
	avformat_close_input(&pFormatCtx);
	av_frame_free(&pframe);

    return 0;
}

【结果】

音频播放：
用ffplay播放PCM:
ffplay audio.pcm -f f32le -ar 44100 -ac 2
f32le即是代表AV_SAMPLE_FMT_FLTP

视频播放：

ffplay video.h264

【关于PCM编码成AAC】在还没了解使用SwrContext前，对于想编码成AAC的PCM文件，我认为这种写入方式是一种“错误”

上面保存的音频格式： L(一个采样点)R(一个采样点)LRLRLR..............

对于2016版ffmpeg，想将此PCM编码成AAC却有点问题，想了解的可以看我的另外一篇文章。

【FFmpeg(2016)】PCM编码AAC

此时的PCM文件写入方式应该是：

            /**
            *   在这里写入文件时我做了一些处理，这是有原因的。
            *   下面的意思是，LRLRLR...的方式写入文件，每次写入4096个字节
            */
            int k=0, h=0;
            for (int i = 0; i < 4; ++i)
            {
                if (i % 2 == 0)
                {
                    int tmp = data_size / 4;
                    for (int j = 0; j < tmp; j+=4,k++ )
                    {
                        data[i * 4096 + j+0] = (char)(l[k]       & 0xff);
                        data[i * 4096 + j+1] = (char)(l[k] >> 8  & 0xff);
                        data[i * 4096 + j+2] = (char)(l[k] >> 16 & 0xff);
                        data[i * 4096 + j+3] = (char)(l[k] >> 24 & 0xff);
                    }
                }
                else
                {
                    int tmp = data_size / 4;
                    for (int j = 0; j < tmp; j += 4,h++)
                    {
                        data[i * 4096 + j+0] = (char)(r[h]       & 0xff);
                        data[i * 4096 + j+1] = (char)(r[h] >> 8  & 0xff);
                        data[i * 4096 + j+2] = (char)(r[h] >> 16 & 0xff);
                        data[i * 4096 + j+3] = (char)(r[h] >> 24 & 0xff);
                    }
                }
            }