ffmpeg从视频文件中提取音频数据

最新推荐文章于 2024-05-27 15:14:25 发布

tusong86

最新推荐文章于 2024-05-27 15:14:25 发布

阅读量2.1k

点赞数 1

文章标签：音视频 c++ 开发语言

本文链接：https://blog.csdn.net/tusong86/article/details/122776685

版权

本文将从音视频mp4文件中提取对应的音频数据，并写入文件。
本人以霍乱时期的爱情电影为题材，提取里面的音频；整个mp4文件为1.69G大小，时长两个小时12分41秒，提取的音频数据生成的音频文件的大小为123兆，时长两个小时12分41秒。

注意，本人只提取了一路音频，不涉及mp4里面多路音频的情况。

提取过程也简单，就不说明了。

代码结构如下：
在这里插入图片描述

其中main函数所在文件FfmpegExtractAudio内容如下:

#include <iostream>
#include "ExtractAudio.h"
#include <vector>

#ifdef	__cplusplus
extern "C"
{
#endif

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")


#ifdef __cplusplus
};
#endif




int main()
{
	CExtractAudio cCExtractAudio;
	const char *pFileA = "E:\\ffmpeg\\convert\\huoluan.mp4";

	const char *pFileOut = "E:\\learn\\ffmpeg\\FfmpegGifTest\\x64\\Release\\huoluan3_audio_extract.mp4";

	cCExtractAudio.StartExtractAudio(pFileA, pFileOut);
	cCExtractAudio.WaitFinish();

	return 0;
}

ExtractAudio.h的内容如下：

#pragma once

#include <Windows.h>

#ifdef	__cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"

#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"


#ifdef __cplusplus
};
#endif


class CExtractAudio
{
public:
	CExtractAudio();
	~CExtractAudio();
public:
	int StartExtractAudio(const char *pFileA, const char *pFileOut);
	int WaitFinish();
private:
	int OpenFileA(const char *pFileA);
	int OpenOutPut(const char *pFileOut);
private:
	static DWORD WINAPI FileAReadProc(LPVOID lpParam);
	void FileARead();


	static DWORD WINAPI ExtractAudioProc(LPVOID lpParam);
	void ExtractAudio();
private:
	AVFormatContext *m_pFormatCtx_FileA = NULL;

	AVCodecContext *m_pReadCodecCtx_AudioA = NULL;
	AVCodec *m_pReadCodec_AudioA = NULL;


	AVCodecContext	*m_pCodecEncodeCtx_Audio = NULL;
	AVFormatContext *m_pFormatCtx_Out = NULL;

	AVAudioFifo		*m_pAudioAFifo = NULL;
private:
	CRITICAL_SECTION m_csAudioASection;
	HANDLE m_hFileAReadThread = NULL;
	HANDLE m_hExtractAudioThread = NULL;

	AVRational m_streamTimeBase;
	SwrContext *m_pAudioConvertCtx = NULL;

	int m_iAudioStreamIndex = 0;
};

ExtractAudio.cpp的内容如下：


#include "ExtractAudio.h"
//#include "log/log.h"





CExtractAudio::CExtractAudio()
{
	InitializeCriticalSection(&m_csAudioASection);
}

CExtractAudio::~CExtractAudio()
{
	DeleteCriticalSection(&m_csAudioASection);
}

int CExtractAudio::StartExtractAudio(const char *pFileA, const char *pFileOut)
{
	int ret = -1;
	do
	{
		ret = OpenFileA(pFileA);
		if (ret != 0)
		{
			break;
		}

		ret = OpenOutPut(pFileOut);
		if (ret != 0)
		{
			break;
		}


		m_pAudioAFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format,
			m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channels, 3000 * 1024);

		int iSrcChLayout = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channel_layout;
		int iDstChLayout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout;

		int iSrcSampleRate = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->sample_rate;
		int iDstSampleRate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate;

		int iSrcFmt = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format;
		int iDstFmt = m_pFormatCtx_Out->streams[0]->codecpar->format;

		m_pAudioConvertCtx = swr_alloc();
		av_opt_set_channel_layout(m_pAudioConvertCtx, "in_channel_layout", iSrcChLayout, 0);
		av_opt_set_channel_layout(m_pAudioConvertCtx, "out_channel_layout", iDstChLayout, 0);
		av_opt_set_int(m_pAudioConvertCtx, "in_sample_rate", iSrcSampleRate, 0);
		av_opt_set_int(m_pAudioConvertCtx, "out_sample_rate", iDstSampleRate, 0);
		av_opt_set_sample_fmt(m_pAudioConvertCtx, "in_sample_fmt", (AVSampleFormat)iSrcFmt, 0);
		av_opt_set_sample_fmt(m_pAudioConvertCtx, "out_sample_fmt", (AVSampleFormat)iDstFmt, 0);

		ret = swr_init(m_pAudioConvertCtx);
		

		m_hFileAReadThread = CreateThread(NULL, 0, FileAReadProc, this, 0, NULL);

		m_hExtractAudioThread = CreateThread(NULL, 0, ExtractAudioProc, this, 0, NULL);

	} while (0);

	return ret;
}

int CExtractAudio::WaitFinish()
{
	int ret = 0;
	do
	{
		if (NULL == m_hFileAReadThread)
		{
			break;
		}
		WaitForSingleObject(m_hFileAReadThread, INFINITE);

		CloseHandle(m_hFileAReadThread);
		m_hFileAReadThread = NULL;

		WaitForSingleObject(m_hExtractAudioThread, INFINITE);
		CloseHandle(m_hExtractAudioThread);
		m_hExtractAudioThread = NULL;

	} while (0);

	return ret;
}

int CExtractAudio::OpenFileA(const char *pFileA)
{
	int ret = -1;

	do
	{
		if ((ret = avformat_open_input(&m_pFormatCtx_FileA, pFileA, 0, 0)) < 0) {
			printf("Could not open input file.\n");
			break;
		}
		if ((ret = avformat_find_stream_info(m_pFormatCtx_FileA, 0)) < 0) {
			printf("Failed to retrieve input stream information\n");
			break;
		}


		for (int i = 0; i < m_pFormatCtx_FileA->nb_streams; i++)
		{
			if (m_pFormatCtx_FileA->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
			{
				m_streamTimeBase = m_pFormatCtx_FileA->streams[i]->time_base;

				m_pReadCodec_AudioA = (AVCodec *)avcodec_find_decoder(m_pFormatCtx_FileA->streams[i]->codecpar->codec_id);

				m_pReadCodecCtx_AudioA = avcodec_alloc_context3(m_pReadCodec_AudioA);

				if (m_pReadCodecCtx_AudioA == NULL)
				{
					break;
				}
				avcodec_parameters_to_context(m_pReadCodecCtx_AudioA, m_pFormatCtx_FileA->streams[i]->codecpar);

				if (avcodec_open2(m_pReadCodecCtx_AudioA, m_pReadCodec_AudioA, NULL) < 0)
				{
					break;
				}

				m_iAudioStreamIndex = i;

				ret = 0;

				break;
			}
		}

	} while (0);


	return ret;
}


int CExtractAudio::OpenOutPut(const char *pFileOut)
{
	int iRet = -1;

	AVStream *pAudioStream = NULL;

	do
	{
		avformat_alloc_output_context2(&m_pFormatCtx_Out, NULL, NULL, pFileOut);

		{
			AVCodec* pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(m_pFormatCtx_Out->oformat->audio_codec);

			m_pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
			if (!m_pCodecEncodeCtx_Audio)
			{
				break;
			}

			pAudioStream = avformat_new_stream(m_pFormatCtx_Out, pCodecEncode_Audio);
			if (!pAudioStream)
			{
				break;
			}


			m_pCodecEncodeCtx_Audio->sample_rate = m_pReadCodecCtx_AudioA->sample_rate;
			m_pCodecEncodeCtx_Audio->channel_layout = m_pReadCodecCtx_AudioA->channel_layout;
			m_pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(m_pCodecEncodeCtx_Audio->channel_layout);
			m_pCodecEncodeCtx_Audio->sample_fmt = (AVSampleFormat)m_pReadCodecCtx_AudioA->sample_fmt;


			if (m_pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
				m_pCodecEncodeCtx_Audio->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

			if (avcodec_open2(m_pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
			{
				//编码器打开失败，退出程序
				break;
			}
		}

		if (!(m_pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
		{
			if (avio_open(&m_pFormatCtx_Out->pb, pFileOut, AVIO_FLAG_WRITE) < 0)
			{
				break;
			}
		}

		avcodec_parameters_from_context(pAudioStream->codecpar, m_pCodecEncodeCtx_Audio);

		if (avformat_write_header(m_pFormatCtx_Out, NULL) < 0)
		{
			break;
		}

		iRet = 0;
	} while (0);


	if (iRet != 0)
	{
		if (m_pCodecEncodeCtx_Audio != NULL)
		{
			avcodec_free_context(&m_pCodecEncodeCtx_Audio);
			m_pCodecEncodeCtx_Audio = NULL;
		}

		if (m_pFormatCtx_Out != NULL)
		{
			avformat_free_context(m_pFormatCtx_Out);
			m_pFormatCtx_Out = NULL;
		}
	}

	return iRet;
}


DWORD WINAPI CExtractAudio::FileAReadProc(LPVOID lpParam)
{
	CExtractAudio *pExtractAudio = (CExtractAudio *)lpParam;
	if (pExtractAudio != NULL)
	{
		pExtractAudio->FileARead();
	}
	return 0;
}

void CExtractAudio::FileARead()
{
	AVFrame *pFrame;
	pFrame = av_frame_alloc();

	AVPacket packet = { 0 };
	int ret = 0;
	while (1)
	{
		av_packet_unref(&packet);

		ret = av_read_frame(m_pFormatCtx_FileA, &packet);
		if (ret == AVERROR(EAGAIN))
		{
			continue;
		}
		else if (ret == AVERROR_EOF)
		{
			break;
		}
		else if (ret < 0)
		{
			break;
		}

		if (packet.stream_index != m_iAudioStreamIndex)
		{
			continue;
		}

		ret = avcodec_send_packet(m_pReadCodecCtx_AudioA, &packet);

		if (ret >= 0)
		{
			ret = avcodec_receive_frame(m_pReadCodecCtx_AudioA, pFrame);
			if (ret == AVERROR(EAGAIN))
			{
				continue;
			}
			else if (ret == AVERROR_EOF)
			{
				break;
			}
			else if (ret < 0) {
				break;
			}

			while (1)
			{
				int buf_space = av_audio_fifo_space(m_pAudioAFifo);
				if (buf_space >= pFrame->nb_samples)
				{
					EnterCriticalSection(&m_csAudioASection);
					ret = av_audio_fifo_write(m_pAudioAFifo, (void **)pFrame->data, pFrame->nb_samples);
					LeaveCriticalSection(&m_csAudioASection);

					break;
				}
				else
				{
					Sleep(100);
				}
			}

		}


		if (ret == AVERROR(EAGAIN))
		{
			continue;
		}
	}

	av_frame_free(&pFrame);
}

DWORD WINAPI CExtractAudio::ExtractAudioProc(LPVOID lpParam)
{
	CExtractAudio *pExtractAudio = (CExtractAudio *)lpParam;
	if (pExtractAudio != NULL)
	{
		pExtractAudio->ExtractAudio();
	}
	return 0;
}


void CExtractAudio::ExtractAudio()
{
	int ret = 0;

	AVFrame *pFrameAudioA = av_frame_alloc();

	pFrameAudioA->nb_samples = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size : 1024;
	pFrameAudioA->channel_layout = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channel_layout;
	pFrameAudioA->format = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format;
	pFrameAudioA->sample_rate = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->sample_rate;
	pFrameAudioA->channels = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channels;
	av_frame_get_buffer(pFrameAudioA, 0);


	AVPacket packet = { 0 };

	int iAudioFrameIndex = 0;

	while (1)
	{
		if (NULL == m_pAudioAFifo)
		{
			break;
		}

		if (av_audio_fifo_size(m_pAudioAFifo) >=
			(m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024))
		{
			EnterCriticalSection(&m_csAudioASection);
			int readcount = av_audio_fifo_read(m_pAudioAFifo, (void **)pFrameAudioA->data,
				(m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size : 1024));
			LeaveCriticalSection(&m_csAudioASection);

			
			ret = avcodec_send_frame(m_pCodecEncodeCtx_Audio, pFrameAudioA);
			if (ret == AVERROR(EAGAIN))
			{
				continue;
			}

			ret = avcodec_receive_packet(m_pCodecEncodeCtx_Audio, &packet);
			if (ret == AVERROR(EAGAIN))
			{
				continue;
			}

			packet.stream_index = 0;
			packet.pts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
			packet.dts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
			packet.duration = m_pFormatCtx_Out->streams[0]->codecpar->frame_size;

			ret = av_write_frame(m_pFormatCtx_Out, &packet);

			iAudioFrameIndex++;
		}
		else
		{
			if (m_hFileAReadThread == NULL)
			{
				break;
			}
			Sleep(1);
		}
	}

	av_write_trailer(m_pFormatCtx_Out);
	avio_close(m_pFormatCtx_Out->pb);

	av_frame_free(&pFrameAudioA);
}

tusong86

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
ffmpeg从视频文件中提取音频数据

本文将从音视频mp4文件中提取对应的音频数据，并写入文件。本人以霍乱时期的爱情电影为题材，提取里面的音频；整个mp4文件为1.69G大小，时长两个小时12分41秒，提取的音频数据生成的音频文件的大小为123兆，时长两个小时12分41秒。注意，本人只提取了一路音频，不涉及mp4里面多路音频的情况。提取过程也简单，就不说明了。代码结构如下：其中main函数所在文件FfmpegExtractAudio内容如下:#include <iostream>#include "ExtractAu
复制链接

扫一扫