ffmpeg中用volumedetect滤镜获取音量,结果是输出到控制台上,本人跟了一下ffmpeg源码,想尝试着看是否有具体的接口来获取音量大小,结果没有,在文件处理完之后,调用avfilter_graph_free时,会触发音量的控制台打印,如下所示:
代码结构如下:
其中FfmpegGetVolume.cpp的代码如下:
#include <iostream>
#include "VolumeGet.h"
#include <vector>
#ifdef __cplusplus
extern "C"
{
#endif
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
int main()
{
CVolumeGet cCVolumeGet;
const char *pFileA = "E:\\learn\\ffmpeg\\FfmpegFilterTest\\x64\\Release\\huoluan3_audio.mp4";
const char *pFileOut = "E:\\learn\\ffmpeg\\FfmpegFilterTest\\x64\\Release\\huoluan3_audio_copy.mp4";
cCVolumeGet.StartVolumeGet(pFileA, pFileOut);
cCVolumeGet.WaitFinish();
return 0;
}
VolumeGet.h的代码如下:
#pragma once
#include <Windows.h>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#ifdef __cplusplus
};
#endif
class CVolumeGet
{
public:
CVolumeGet();
~CVolumeGet();
public:
int StartVolumeGet(const char *pFileA, const char *pFileOut);
int WaitFinish();
private:
int OpenFileA(const char *pFileA);
int OpenOutPut(const char *pFileOut);
int InitFilter(const char* filter_desc);
private:
static DWORD WINAPI FileAReadProc(LPVOID lpParam);
void FileARead();
static DWORD WINAPI VolumeGetProc(LPVOID lpParam);
void VolumeGet();
static DWORD WINAPI AudioWriteProc(LPVOID lpParam);
void AudioWrite();
private:
AVFormatContext *m_pFormatCtx_FileA = NULL;
AVCodecContext *m_pReadCodecCtx_AudioA = NULL;
AVCodec *m_pReadCodec_AudioA = NULL;
AVCodecContext *m_pCodecEncodeCtx_Audio = NULL;
AVFormatContext *m_pFormatCtx_Out = NULL;
AVAudioFifo *m_pAudioAFifo = NULL;
AVAudioFifo *m_pResampleFifo = NULL;
private:
AVFilterGraph* m_pFilterGraph = NULL;
AVFilterContext* m_pFilterCtxSrcAudioA = NULL;
AVFilterContext* m_pFilterCtxSink = NULL;
private:
CRITICAL_SECTION m_csAudioASection;
CRITICAL_SECTION m_csResampleSection;
HANDLE m_hFileAReadThread = NULL;
HANDLE m_hVolumeGetThread = NULL;
HANDLE m_hAudioWriteThread = NULL;
AVRational m_streamTimeBase;
SwrContext *m_pAudioConvertCtx = NULL;
};
VolumeGet.cpp的代码如下:
#include "VolumeGet.h"
//#include "log/log.h"
CVolumeGet::CVolumeGet()
{
InitializeCriticalSection(&m_csAudioASection);
InitializeCriticalSection(&m_csResampleSection);
}
CVolumeGet::~CVolumeGet()
{
DeleteCriticalSection(&m_csAudioASection);
DeleteCriticalSection(&m_csResampleSection);
}
int CVolumeGet::StartVolumeGet(const char *pFileA, const char *pFileOut)
{
int ret = -1;
do
{
ret = OpenFileA(pFileA);
if (ret != 0)
{
break;
}
ret = OpenOutPut(pFileOut);
if (ret != 0)
{
break;
}
char szFilterDesc[512] = { 0 };
InitFilter(szFilterDesc);
m_pAudioAFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_FileA->streams[0]->codecpar->format,
m_pFormatCtx_FileA->streams[0]->codecpar->channels, 3000 * 1024);
m_pResampleFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_FileA->streams[0]->codecpar->format,
m_pFormatCtx_FileA->streams[0]->codecpar->channels, 3000 * 1024);
int iSrcChLayout = m_pFormatCtx_FileA->streams[0]->codecpar->channel_layout;
int iDstChLayout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout;
int iSrcSampleRate = m_pFormatCtx_FileA->streams[0]->codecpar->sample_rate;
int iDstSampleRate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate;
int iSrcFmt = m_pFormatCtx_FileA->streams[0]->codecpar->format;
int iDstFmt = m_pFormatCtx_Out->streams[0]->codecpar->format;
m_pAudioConvertCtx = swr_alloc();
av_opt_set_channel_layout(m_pAudioConvertCtx, "in_channel_layout", iSrcChLayout, 0);
av_opt_set_channel_layout(m_pAudioConvertCtx, "out_channel_layout", iDstChLayout, 0);
av_opt_set_int(m_pAudioConvertCtx, "in_sample_rate", iSrcSampleRate, 0);
av_opt_set_int(m_pAudioConvertCtx, "out_sample_rate", iDstSampleRate, 0);
av_opt_set_sample_fmt(m_pAudioConvertCtx, "in_sample_fmt", (AVSampleFormat)iSrcFmt, 0);
av_opt_set_sample_fmt(m_pAudioConvertCtx, "out_sample_fmt", (AVSampleFormat)iDstFmt, 0);
ret = swr_init(m_pAudioConvertCtx);
m_hFileAReadThread = CreateThread(NULL, 0, FileAReadProc, this, 0, NULL);
m_hVolumeGetThread = CreateThread(NULL, 0, VolumeGetProc, this, 0, NULL);
m_hAudioWriteThread = CreateThread(NULL, 0, AudioWriteProc, this, 0, NULL);
} while (0);
return ret;
}
int CVolumeGet::WaitFinish()
{
int ret = 0;
do
{
if (NULL == m_hFileAReadThread)
{
break;
}
WaitForSingleObject(m_hFileAReadThread, INFINITE);
CloseHandle(m_hFileAReadThread);
m_hFileAReadThread = NULL;
Sleep(1000);
WaitForSingleObject(m_hVolumeGetThread, INFINITE);
CloseHandle(m_hVolumeGetThread);
m_hVolumeGetThread = NULL;
WaitForSingleObject(m_hAudioWriteThread, INFINITE);
CloseHandle(m_hAudioWriteThread);
m_hAudioWriteThread = NULL;
} while (0);
return ret;
}
int CVolumeGet::OpenFileA(const char *pFileA)
{
int ret = -1;
do
{
if ((ret = avformat_open_input(&m_pFormatCtx_FileA, pFileA, 0, 0)) < 0) {
printf("Could not open input file.");
break;
}
if ((ret = avformat_find_stream_info(m_pFormatCtx_FileA, 0)) < 0) {
printf("Failed to retrieve input stream information");
break;
}
m_streamTimeBase = m_pFormatCtx_FileA->streams[0]->time_base;
m_pReadCodec_AudioA = (AVCodec *)avcodec_find_decoder(m_pFormatCtx_FileA->streams[0]->codecpar->codec_id);
m_pReadCodecCtx_AudioA = avcodec_alloc_context3(m_pReadCodec_AudioA);
if (m_pReadCodecCtx_AudioA == NULL)
{
break;
}
avcodec_parameters_to_context(m_pReadCodecCtx_AudioA, m_pFormatCtx_FileA->streams[0]->codecpar);
if (avcodec_open2(m_pReadCodecCtx_AudioA, m_pReadCodec_AudioA, NULL) < 0)
{
break;
}
ret = 0;
} while (0);
return ret;
}
int CVolumeGet::OpenOutPut(const char *pFileOut)
{
int iRet = -1;
AVStream *pAudioStream = NULL;
do
{
avformat_alloc_output_context2(&m_pFormatCtx_Out, NULL, NULL, pFileOut);
{
AVCodec* pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(m_pFormatCtx_Out->oformat->audio_codec);
m_pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
if (!m_pCodecEncodeCtx_Audio)
{
break;
}
pAudioStream = avformat_new_stream(m_pFormatCtx_Out, pCodecEncode_Audio);
if (!pAudioStream)
{
break;
}
m_pCodecEncodeCtx_Audio->sample_rate = m_pReadCodecCtx_AudioA->sample_rate;
m_pCodecEncodeCtx_Audio->channel_layout = m_pReadCodecCtx_AudioA->channel_layout;
m_pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(m_pCodecEncodeCtx_Audio->channel_layout);
m_pCodecEncodeCtx_Audio->sample_fmt = (AVSampleFormat)m_pReadCodecCtx_AudioA->sample_fmt;
if (m_pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
m_pCodecEncodeCtx_Audio->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (avcodec_open2(m_pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
{
//编码器打开失败,退出程序
break;
}
}
if (!(m_pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&m_pFormatCtx_Out->pb, pFileOut, AVIO_FLAG_WRITE) < 0)
{
break;
}
}
avcodec_parameters_from_context(pAudioStream->codecpar, m_pCodecEncodeCtx_Audio);
if (avformat_write_header(m_pFormatCtx_Out, NULL) < 0)
{
break;
}
iRet = 0;
} while (0);
if (iRet != 0)
{
if (m_pCodecEncodeCtx_Audio != NULL)
{
avcodec_free_context(&m_pCodecEncodeCtx_Audio);
m_pCodecEncodeCtx_Audio = NULL;
}
if (m_pFormatCtx_Out != NULL)
{
avformat_free_context(m_pFormatCtx_Out);
m_pFormatCtx_Out = NULL;
}
}
return iRet;
}
DWORD WINAPI CVolumeGet::FileAReadProc(LPVOID lpParam)
{
CVolumeGet *pVolumeGet = (CVolumeGet *)lpParam;
if (pVolumeGet != NULL)
{
pVolumeGet->FileARead();
}
return 0;
}
void CVolumeGet::FileARead()
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
int iReadedNbFrames = 0;
AVPacket packet = { 0 };
int ret = 0;
while (1)
{
av_packet_unref(&packet);
ret = av_read_frame(m_pFormatCtx_FileA, &packet);
if (ret == AVERROR(EAGAIN))
{
continue;
}
else if (ret == AVERROR_EOF)
{
break;
}
else if (ret < 0)
{
break;
}
ret = avcodec_send_packet(m_pReadCodecCtx_AudioA, &packet);
if (ret >= 0)
{
ret = avcodec_receive_frame(m_pReadCodecCtx_AudioA, pFrame);
if (ret == AVERROR(EAGAIN))
{
continue;
}
else if (ret == AVERROR_EOF)
{
break;
}
else if (ret < 0) {
break;
}
iReadedNbFrames += pFrame->nb_samples;
while (1)
{
int buf_space = av_audio_fifo_space(m_pAudioAFifo);
if (buf_space >= pFrame->nb_samples)
{
EnterCriticalSection(&m_csAudioASection);
ret = av_audio_fifo_write(m_pAudioAFifo, (void **)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&m_csAudioASection);
break;
}
else
{
Sleep(100);
}
}
}
if (ret == AVERROR(EAGAIN))
{
continue;
}
}
av_frame_free(&pFrame);
}
DWORD WINAPI CVolumeGet::VolumeGetProc(LPVOID lpParam)
{
CVolumeGet *pVolumeGet = (CVolumeGet *)lpParam;
if (pVolumeGet != NULL)
{
pVolumeGet->VolumeGet();
}
return 0;
}
void CVolumeGet::VolumeGet()
{
int ret = 0;
AVFrame *pFrameAudioOut = av_frame_alloc();
pFrameAudioOut->nb_samples = m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024;
pFrameAudioOut->channel_layout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout;
pFrameAudioOut->format = m_pFormatCtx_Out->streams[0]->codecpar->format;
pFrameAudioOut->sample_rate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate;
pFrameAudioOut->channels = m_pFormatCtx_Out->streams[0]->codecpar->channels;
av_frame_get_buffer(pFrameAudioOut, 0);
AVPacket packet = { 0 };
int iWriteNbSamples = 0;
while (1)
{
if (NULL == m_pAudioAFifo)
{
break;
}
AVFrame *pFrameAudioA = av_frame_alloc();
pFrameAudioA->nb_samples = m_pFormatCtx_FileA->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[0]->codecpar->frame_size : 1024;
pFrameAudioA->channel_layout = m_pFormatCtx_FileA->streams[0]->codecpar->channel_layout;
pFrameAudioA->format = m_pFormatCtx_FileA->streams[0]->codecpar->format;
pFrameAudioA->sample_rate = m_pFormatCtx_FileA->streams[0]->codecpar->sample_rate;
pFrameAudioA->channels = m_pFormatCtx_FileA->streams[0]->codecpar->channels;
av_frame_get_buffer(pFrameAudioA, 0);
if (av_audio_fifo_size(m_pAudioAFifo) >=
(m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024))
{
EnterCriticalSection(&m_csAudioASection);
int readcount = av_audio_fifo_read(m_pAudioAFifo, (void **)pFrameAudioA->data,
(m_pFormatCtx_FileA->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[0]->codecpar->frame_size : 1024));
LeaveCriticalSection(&m_csAudioASection);
ret = av_buffersrc_add_frame(m_pFilterCtxSrcAudioA, pFrameAudioA);
if (ret < 0)
{
break;
}
while (1)
{
ret = av_buffersink_get_frame_flags(m_pFilterCtxSink, pFrameAudioOut, 0);
if (ret < 0)
{
break;
}
while (1)
{
int buf_space = av_audio_fifo_space(m_pResampleFifo);
if (buf_space >= pFrameAudioOut->nb_samples)
{
iWriteNbSamples += pFrameAudioOut->nb_samples;
EnterCriticalSection(&m_csResampleSection);
ret = av_audio_fifo_write(m_pResampleFifo, (void **)pFrameAudioOut->data, pFrameAudioOut->nb_samples);
LeaveCriticalSection(&m_csResampleSection);
break;
}
else
{
Sleep(100);
}
}
}
av_frame_free(&pFrameAudioA);
}
else
{
if (m_hFileAReadThread == NULL)
{
break;
}
Sleep(1);
}
}
av_frame_free(&pFrameAudioOut);
}
DWORD WINAPI CVolumeGet::AudioWriteProc(LPVOID lpParam)
{
CVolumeGet *pVolumeGet = (CVolumeGet *)lpParam;
if (pVolumeGet != NULL)
{
pVolumeGet->AudioWrite();
}
return 0;
}
void CVolumeGet::AudioWrite()
{
int ret = 0;
AVFrame *pFrameAudioResample = av_frame_alloc();
pFrameAudioResample->nb_samples = m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024;
pFrameAudioResample->channel_layout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout;
pFrameAudioResample->format = m_pFormatCtx_Out->streams[0]->codecpar->format;
pFrameAudioResample->sample_rate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate;
pFrameAudioResample->channels = m_pFormatCtx_Out->streams[0]->codecpar->channels;
av_frame_get_buffer(pFrameAudioResample, 0);
AVPacket packet = { 0 };
int iAudioFrameIndex = 0;
AVFrame *pFrameAudioOut = NULL;
while (1)
{
if (NULL == m_pResampleFifo)
{
break;
}
if (av_audio_fifo_size(m_pResampleFifo) >=
(m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024))
{
EnterCriticalSection(&m_csResampleSection);
int readcount = av_audio_fifo_read(m_pResampleFifo, (void **)pFrameAudioResample->data,
(m_pFormatCtx_FileA->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_FileA->streams[0]->codecpar->frame_size : 1024));
LeaveCriticalSection(&m_csResampleSection);
int dst_nb_samples = av_rescale_rnd(pFrameAudioResample->nb_samples, m_pReadCodecCtx_AudioA->sample_rate, m_pCodecEncodeCtx_Audio->sample_rate, AVRounding(1));
ret = avcodec_send_frame(m_pCodecEncodeCtx_Audio, pFrameAudioResample);
if (ret == AVERROR(EAGAIN))
{
continue;
}
ret = avcodec_receive_packet(m_pCodecEncodeCtx_Audio, &packet);
if (ret == AVERROR(EAGAIN))
{
continue;
}
packet.stream_index = 0;
packet.pts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
packet.dts = iAudioFrameIndex * m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
packet.duration = m_pFormatCtx_Out->streams[0]->codecpar->frame_size;
av_write_frame(m_pFormatCtx_Out, &packet);
iAudioFrameIndex++;
}
else
{
if (m_hVolumeGetThread == NULL)
{
break;
}
Sleep(1);
}
}
av_write_trailer(m_pFormatCtx_Out);
avio_close(m_pFormatCtx_Out->pb);
av_frame_free(&pFrameAudioResample);
avfilter_graph_free(&m_pFilterGraph);
}
int CVolumeGet::InitFilter(const char* filter_desc)
{
int ret = 0;
char args_audioA[512];
const char* name_audioA = "in0";
const char* name_volumeFilter = "volumeFilter";
AVFilter* filter_src_audioA = (AVFilter *)avfilter_get_by_name("abuffer");
AVFilter* filter_sink = (AVFilter *)avfilter_get_by_name("abuffersink");
AVFilter *filter_volume_detect = (AVFilter *)avfilter_get_by_name("volumedetect");
AVFilterInOut* filter_output_videoA = avfilter_inout_alloc();
AVFilterInOut* filter_input = avfilter_inout_alloc();
m_pFilterGraph = avfilter_graph_alloc();
sprintf_s(args_audioA, sizeof(args_audioA), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
m_pReadCodecCtx_AudioA->time_base.num,
m_pReadCodecCtx_AudioA->time_base.den,
m_pReadCodecCtx_AudioA->sample_rate,
av_get_sample_fmt_name((AVSampleFormat)m_pReadCodecCtx_AudioA->sample_fmt),
m_pReadCodecCtx_AudioA->channel_layout);
do
{
ret = avfilter_graph_create_filter(&m_pFilterCtxSrcAudioA, filter_src_audioA, name_audioA, args_audioA, NULL, m_pFilterGraph);
if (ret < 0)
{
break;
}
AVFilterContext *volumeFilter_ctx;
ret = avfilter_graph_create_filter(&volumeFilter_ctx, filter_volume_detect, name_volumeFilter, filter_desc, NULL, m_pFilterGraph);
if (ret < 0)
{
break;
}
ret = avfilter_graph_create_filter(&m_pFilterCtxSink, filter_sink, "out", NULL, NULL, m_pFilterGraph);
if (ret < 0)
{
break;
}
ret = av_opt_set_bin(m_pFilterCtxSink, "sample_fmts", (uint8_t*)&m_pCodecEncodeCtx_Audio->sample_fmt, sizeof(m_pCodecEncodeCtx_Audio->sample_fmt), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
break;
}
ret = av_opt_set_bin(m_pFilterCtxSink, "channel_layouts", (uint8_t*)&m_pCodecEncodeCtx_Audio->channel_layout, sizeof(m_pCodecEncodeCtx_Audio->channel_layout), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
break;
}
ret = av_opt_set_bin(m_pFilterCtxSink, "sample_rates", (uint8_t*)&m_pCodecEncodeCtx_Audio->sample_rate, sizeof(m_pCodecEncodeCtx_Audio->sample_rate), AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
break;
}
ret = avfilter_link(m_pFilterCtxSrcAudioA, 0, volumeFilter_ctx, 0);
if (ret != 0)
{
break;
}
ret = avfilter_link(volumeFilter_ctx, 0, m_pFilterCtxSink, 0);
if (ret != 0)
{
break;
}
ret = avfilter_graph_config(m_pFilterGraph, NULL);
if (ret < 0)
{
break;
}
ret = 0;
} while (0);
avfilter_inout_free(&filter_input);
av_free(filter_src_audioA);
char* temp = avfilter_graph_dump(m_pFilterGraph, NULL);
return ret;
}