本文将从音视频mp4文件中提取对应的音频数据,并写入文件。
本人以霍乱时期的爱情电影为题材,提取里面的音频;整个mp4文件为1.69G大小,时长两个小时12分41秒,提取的音频数据生成的音频文件的大小为123兆,时长两个小时12分41秒。
注意,本人只提取了一路音频,不涉及mp4里面多路音频的情况。
提取过程也简单,就不说明了。
代码结构如下:
其中main函数所在文件FfmpegExtractAudio内容如下:
#include <iostream>
#include "ExtractAudio.h"
#include <vector>
#ifdef __cplusplus
extern "C"
{
#endif
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
int main()
{
CExtractAudio cCExtractAudio;
const char *pFileA = "E:\\ffmpeg\\convert\\huoluan.mp4";
const char *pFileOut = "E:\\learn\\ffmpeg\\FfmpegGifTest\\x64\\Release\\huoluan3_audio_extract.mp4";
cCExtractAudio.StartExtractAudio(pFileA, pFileOut);
cCExtractAudio.WaitFinish();
return 0;
}
ExtractAudio.h的内容如下:
#pragma once
#include <Windows.h>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#ifdef __cplusplus
};
#endif
class CExtractAudio
{
public:
CExtractAudio();
~CExtractAudio();
public:
int StartExtractAudio(const char *pFileA, const char *pFileOut);
int WaitFinish();
private:
int OpenFileA(const char *pFileA);
int OpenOutPut(const char *pFileOut);
private:
static DWORD WINAPI FileAReadProc(LPVOID lpParam);
void FileARead();
static DWORD WINAPI ExtractAudioProc(LPVOID lpParam);
void ExtractAudio();
private:
AVFormatContext *m_pFormatCtx_FileA = NULL;
AVCodecContext *m_pReadCodecCtx_AudioA = NULL;
AVCodec *m_pReadCodec_AudioA = NULL;
AVCodecContext *m_pCodecEncodeCtx_Audio = NULL;
AVFormatContext *m_pFormatCtx_Out = NULL;
AVAudioFifo *m_pAudioAFifo = NULL;
private:
CRITICAL_SECTION m_csAudioASection;
HANDLE m_hFileAReadThread = NULL;
HANDLE m_hExtractAudioThread = NULL;
AVRational m_streamTimeBase;
SwrContext *m_pAudioConvertCtx = NULL;
int m_iAudioStreamIndex = 0;
};
ExtractAudio.cpp的内容如下:
#include "ExtractAudio.h"
//#include "log/log.h"
CExtractAudio::CExtractAudio()
{
InitializeCriticalSection(&m_csAudioASection);
}
CExtractAudio::~CExtractAudio()
{
DeleteCriticalSection(&m_csAudioASection);
}
int CExtractAudio::StartExtractAudio(const char *pFileA, const char *pFileOut)
{
int ret = -1;
do
{
ret = OpenFileA(pFileA);
if (ret != 0)
{
break;
}
ret = OpenOutPut(pFileOut);
if (ret != 0)
{
break;
}
m_pAudioAFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format,
m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channels, 3000 * 1024);
int iSrcChLayout = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channel_layout;
int iDstChLayout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout;
int iSrcSampleRate = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->sample_rate;
int iDstSampleRate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate;
int iSrcFmt = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format;
int iDstFmt = m_pFormatCtx_Out->streams[0]->codecpar->format;
m_pAudioConvertCtx = swr_alloc();
av_opt_set_channel_layout(m_pAudioConvertCtx, "in_channel_layout", iSrcChLayout, 0);
av_opt_set_channel_layout(m_pAudioConvertCtx, "out_channel_layout", iDstChLayout, 0);
av_opt_set_int(m_pAudioConvertCtx, "in_sample_rate", iSrcSampleRate, 0);
av_opt_set_int(m_pAudioConvertCtx, "out_sample_rate", iDstSampleRate, 0);
av_opt_set_sample_fmt(m_pAudioConvertCtx, "in_sample_fmt", (AVSampleFormat)iSrcFmt, 0);
av_opt_set_sample_fmt(m_pAudioConvertCtx, "out_sample_fmt", (AVSampleFormat)iDstFmt, 0);
ret = swr_init(m_pAudioConvertCtx);
m_hFileAReadThread = CreateThread(NULL, 0, FileAReadProc, this, 0, NULL);
m_hExtractAudioThread = CreateThread(NULL, 0, ExtractAudioProc, this, 0, NULL);
} while (0);
return ret;
}
int CExtractAudio::WaitFinish()
{
int ret = 0;
do
{
if (NULL == m_hFileAReadThread)
{
break;
}
WaitForSingleObject(m_hFileAReadThread, INFINITE);
CloseHandle(m_hFileAReadThread);
m_hFileAReadThread = NULL;
WaitForSingleObject(m_hExtractAudioThread, INFINITE);
CloseHandle(m_hExtractAudioThread);
m_hExtractAudioThread = NULL;
} while (0);
return ret;
}
int CExtractAudio::OpenFileA(const char *pFileA)
{
int ret = -1;
do
{
if ((ret = avformat_open_input(&m_pFormatCtx_FileA, pFileA, 0, 0)) < 0) {
printf("Could not open input file.\n");
break;
}
if ((ret = avformat_find_stream_info(m_pFormatCtx_FileA, 0)) < 0) {
printf("Failed to retrieve input stream information\n");
break;
}
for (int i = 0; i < m_pFormatCtx_FileA->nb_streams; i++)
{
if (m_pFormatCtx_FileA->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
m_streamTimeBase = m_pFormatCtx_FileA->streams[i]->time_base;
m_pReadCodec_AudioA = (AVCodec *)avcodec_find_decoder(m_pFormatCtx_FileA->streams[i]->codecpar->codec_id);
m_pReadCodecCtx_AudioA = avcodec_alloc_context3(m_pReadCodec_AudioA);
if (m_pReadCodecCtx_AudioA == NULL)
{
break;
}
avcodec_parameters_to_context(m_pReadCodecCtx_AudioA, m_pFormatCtx_FileA->streams[i]->codecpar);
if (avcodec_open2(m_pReadCodecCtx_AudioA, m_pReadCodec_AudioA, NULL) < 0)
{
break;
}
m_iAudioStreamIndex = i;
ret = 0;
break;
}
}
} while (0);
return ret;
}
int CExtractAudio::OpenOutPut(const char *pFileOut)
{
int iRet = -1;
AVStream *pAudioStream = NULL;
do
{
avformat_alloc_output_context2(&m_pFormatCtx_Out, NULL, NULL, pFileOut);
{
AVCodec* pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(m_pFormatCtx_Out->oformat->audio_codec);
m_pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
if (!m_pCodecEncodeCtx_Audio)
{
break;
}
pAudioStream = avformat_new_stream(m_pFormatCtx_Out, pCodecEncode_Audio);
if (!pAudioStream)
{
break;
}
m_pCodecEncodeCtx_Audio->sample_rate = m_pReadCodecCtx_AudioA->sample_rate;
m_pCodecEncodeCtx_Audio->channel_layout = m_pReadCodecCtx_AudioA->channel_layout;
m_pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(m_pCodecEncodeCtx_Audio->channel_layout);
m_pCodecEncodeCtx_Audio->sample_fmt = (AVSampleFormat)m_pReadCodecCtx_AudioA->sample_fmt;
if (m_pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
m_pCodecEncodeCtx_Audio->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (avcodec_open2(m_pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
{
//编码器打开失败,退出程序
break;
}
}
if (!(m_pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&m_pFormatCtx_Out->pb, pFileOut, AVIO_FLAG_WRITE) < 0)
{
break;
}
}
avcodec_parameters_from_context(pAudioStream->codecpar, m_pCodecEncodeCtx_Audio);
if (avformat_write_header(m_pFormatCtx_Out, NULL) < 0)
{
break;
}
iRet = 0;
} while (0);
if (iRet != 0)
{
if (m_pCodecEncodeCtx_Audio != NULL)
{
avcodec_free_context(&m_pCodecEncodeCtx_Audio);
m_pCodecEncodeCtx_Audio = NULL;
}
if (m_pFormatCtx_Out != NULL)
{
avformat_free_context(m_pFormatCtx_Out);
m_pFormatCtx_Out = NULL;
}
}
return iRet;
}
DWORD WINAPI CExtractAudio::FileAReadProc(LPVOID lpParam)
{
CExtractAudio *pExtractAudio = (CExtractAudio *)lpParam;
if (pExtractAudio != NULL)
{
pExtractAudio->FileARead();
}
return 0;
}
void CExtractAudio::FileARead()
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
AVPacket packet = { 0 };
int ret = 0;
while (1)
{
av_packet_unref(&packet);
ret = av_read_frame(m_pFormatCtx_FileA, &packet);
if (ret == AVERROR(EAGAIN))
{
continue;
}
else if (ret == AVERROR_EOF)
{
break;
}
else if (ret < 0)
{
break;
}
if (packet.stream_index != m_iAudioStreamIndex)
{
continue;
}
ret = avcodec_send_packet(m_pReadCodecCtx_AudioA, &packet);
if (ret >= 0)
{
ret = avcodec_receive_frame(m_pReadCodecCtx_AudioA, pFrame);
if (ret == AVERROR(EAGAIN))
{
continue;
}
else if (ret == AVERROR_EOF)
{
break;
}
else if (ret < 0) {
break;
}
while (1)
{
int buf_space = av_audio_fifo_space(m_pAudioAFifo);
if (buf_space >= pFrame->nb_samples)
{
EnterCriticalSection(&m_csAudioASection);
ret = av_audio_fifo_write(m_pAudioAFifo, (void **)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&m_csAudioASection);
break;
}
else
{
Sleep(100);
}
}
}
if (ret == AVERROR(EAGAIN))
{
continue;
}
}
av_frame_free(&pFrame);
}
DWORD WINAPI CExtractAudio::ExtractAudioProc(LPVOID lpParam)
{
CExtractAudio *pExtractAudio = (CExtractAudio *)lpParam;
if (pExtractAudio != NULL)
{
pExtractAudio->ExtractAudio();
}
return 0;
}
void CExtractAudio::ExtractAudio()
{
int ret = 0;
AVFrame *pFrameAudioA = av_frame_alloc();
pFrameAudioA->nb_samples = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size : 1024;
pFrameAudioA->channel_layout = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channel_layout;
pFrameAudioA->format = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->format;
pFrameAudioA->sample_rate = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->sample_rate;
pFrameAudioA->channels = m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->channels;
av_frame_get_buffer(pFrameAudioA, 0);
AVPacket packet = { 0 };
int iAudioFrameIndex = 0;
while (1)
{
if (NULL == m_pAudioAFifo)
{
break;
}
if (av_audio_fifo_size(m_pAudioAFifo) >=
(m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024))
{
EnterCriticalSection(&m_csAudioASection);
int readcount = av_audio_fifo_read(m_pAudioAFifo, (void **)pFrameAudioA->data,
(m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[m_iAudioStreamIndex]->codecpar->frame_size : 1024));
LeaveCriticalSection(&m_csAudioASection);
ret = avcodec_send_frame(m_pCodecEncodeCtx_Audio, pFrameAudioA);
if (ret == AVERROR(EAGAIN))
{
continue;
}
ret = avcodec_receive_packet(m_pCodecEncodeCtx_Audio, &packet);
if (ret == AVERROR(EAGAIN))
{
continue;
}
packet.stream_index = 0;
packet.pts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
packet.dts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
packet.duration = m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
ret = av_write_frame(m_pFormatCtx_Out, &packet);
iAudioFrameIndex++;
}
else
{
if (m_hFileAReadThread == NULL)
{
break;
}
Sleep(1);
}
}
av_write_trailer(m_pFormatCtx_Out);
avio_close(m_pFormatCtx_Out->pb);
av_frame_free(&pFrameAudioA);
}