实现混音时需要保证两道音频流的格式(采样率、位深和声道数)完全相同,若不同,需在混音前需要对原始音频数据进行重采样。
ffmpeg命令:
2个pcm混音,输入输出格式都相同:48k,双声道,16bit
ffmpeg -f s16le -ar 48000 -ac 2 -i 11.pcm -f s16le -ar 48000 -ac 2 -i 22.pcm -filter_complex amix=inputs=2:duration=first:dropout_transition=0 -f s16le -ar 48000 -ac 2 mix.pcm
code1: 归一化混音
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define IN_FILE1 "E:\\doc\\VA_tool\\11.pcm"
#define IN_FILE2 "E:\\doc\\VA_tool\\22.pcm"
#define OUT_FILE "E:\\doc\\VA_tool\\remix.pcm"
#define SIZE_AUDIO_FRAME (2)
void Mix(char sourseFile[10][SIZE_AUDIO_FRAME], int number, char* objectFile)
{
int const MAX = 32767;
int const MIN = -32768;
double f = 1;
int output;
int i = 0, j = 0;
for (i = 0; i < SIZE_AUDIO_FRAME / 2; i++)
{
int temp = 0;
for (j = 0; j < number; j++)
{
temp += *(short*)(sourseFile[j] + i * 2);
}
output = (int)(temp * f);
if (output > MAX)
{
f = (double)MAX / (double)(output);
output = MAX;
}
if (output < MIN)
{
f = (double)MIN / (double)(output);
output = MIN;
}
if (f < 1)
{
f += ((double)1 - f) / (double)32;
}
*(short*)(objectFile + i * 2) = (short)output;
}
}
int main()
{
FILE* fp1, * fp2, * fpm;
fopen_s(&fp1,IN_FILE1, "rb");
fopen_s(&fp2,IN_FILE2, "rb");
fopen_s(&fpm,OUT_FILE, "wb");
short data1, data2, date_mix;
int ret1, ret2;
char sourseFile[10][2];
while (1)
{
ret1 = fread(&data1, 2, 1, fp1);
ret2 = fread(&data2, 2, 1, fp2);
*(short*)sourseFile[0] = data1;
*(short*)sourseFile[1] = data2;
if (ret1 > 0 && ret2 > 0)
{
Mix(sourseFile, 2, (char*)&date_mix);
/*
if( data1 < 0 && data2 < 0)
date_mix = data1+data2 - (data1 * data2 / -(pow(2,16-1)-1));
else
date_mix = data1+data2 - (data1 * data2 / (pow(2,16-1)-1));*/
if (date_mix > pow(2, 16 - 1) || date_mix < -pow(2, 16 - 1))
printf("mix error\n");
}
else if ((ret1 > 0) && (ret2 == 0))
{
date_mix = data1;
}
else if ((ret2 > 0) && (ret1 == 0))
{
date_mix = data2;
}
else if ((ret1 == 0) && (ret2 == 0))
{
break;
}
fwrite(&date_mix, 2, 1, fpm);
}
fclose(fp1);
fclose(fp2);
fclose(fpm);
std::cout << "done!\n";
system("pause");
return 0;
}
code2:ffmpeg amix混音
int AudioMixAction::_initEngine()
{
int ret = _checkParams();
if(ret != EV_SUCCESS) return ret;
ret = _init_spec_filter();
if(ret != ErrorValue::EV_SUCCESS) return ret;
frames = (AVFrame**)av_malloc(output_params.audioNum * sizeof(AVFrame*));
frameOut = av_frame_alloc();
for (int i = 0; i < output_params.audioNum; i++)
{
frames[i] = av_frame_alloc();
if (!(frames[i]))
{
ret = ErrorValue::EV_ERROR_MIX_AUDIO_NO_MEM;
break;
}
auto input = sources_vec[i];
frames[i]->nb_samples = input.audioInfo.nb_samples; //每帧sample数
frames[i]->channels = input.audioInfo.channel;
frames[i]->channel_layout = av_get_default_channel_layout(input.audioInfo.channel); //通道的格式
frames[i]->format = AV_SAMPLE_FMT_FLTP; //ndi推流音频帧格式,v3_t版本才行
frames[i]->sample_rate = input.audioInfo.samples;
//分配空间
ret = av_frame_get_buffer(frames[i], 1);
if (ret < 0)
{
ret = ErrorValue::EV_ERROR_MIX_AUDIO_NO_MEM;
break;
}
}
return ret;
}
int AudioMixAction::_mixAudio()
{
if(!_checkCanMix())
return EV_ERROR_MIX_AUDIO_EAGIN;
AVFrame* frameOut = av_frame_alloc();
int ret = -1;
int j = 0;
for (int i = 0; i < output_params.audioNum; i++)
{
auto input = sources_vec[i];
std::string srcid = input.sourceId;
if(!sources_map[srcid].empty())
{
auto data = sources_map[srcid].front();
sources_map[srcid].pop();
if(data.get()->mediaType == MixMediaType::MMT_AUDIO)
{
AVFrame* frame = frames[i];
//ndi 音频帧双声道 fltp平面格式data赋值给AVFrame帧
int data_size = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLTP); //planar
for (int i = 0; i < input.audioInfo.nb_samples; i++)
for (int ch = 0; ch < input.audioInfo.channel; ch++)
memcpy(frame->data[ch] + data_size*i,(uint8_t*)data.get()->audioData.pdata + data_size*i,data_size);
frame->pts = data.get()->audioData.timestamp;
AVFilterContext* context = filter_ctx.get()->buffersrc_ctx[i];
ret = av_buffersrc_add_frame_flags(context, frame,AV_BUFFERSRC_FLAG_KEEP_REF);
if (ret < 0)
{
av_log(NULL, AV_LOG_ERROR, "Error while feeding the audio filtergraph\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_ADD_FILTER_FAILE;
goto end;
}
}
else
{
ret = ErrorValue::EV_ERROR_MIX_AUDIO_SRC_DATA_INVALID;
goto end;
}
}
}
while(true)
{
AVFilterContext* context = filter_ctx.get()->buffersink_ctx;
ret = av_buffersink_get_frame(context, frameOut);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
ret = 0;
break;
}
if (ret < 0)
{
goto end;
}
if(frameOut)
{
//TODO 获取到合成的frame 然后操作
std::cout << "get mix audio frame -----" << std::endl;
if (bWriteFile)
{
int data_size = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLTP); //planar
for (int i = 0; i < frameOut->nb_samples; i++)
for (int ch = 0; ch < frameOut->channels; ch++)
fwrite(frameOut->data[ch] + data_size*i, 1, data_size, outPcm);
if(high_resolution_clock::now() - startTime >= seconds(20))
{
fclose(outPcm);
bWriteFile = false;
}
}
}
av_frame_unref(frameOut);
}
end:
av_free(frameOut);
return ret;
}
int AudioMixAction::_init_spec_filter(void)
{
char filter_spec[512]={0};
char spec_temp[128]={0};
int ret;
filter_ctx = std::make_shared<AudioMixAction::FilteringContext>();
if (!filter_ctx)
return ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
filter_ctx.get()->buffersrc_ctx = NULL;
filter_ctx.get()->buffersink_ctx = NULL;
filter_ctx.get()->filter_graph = NULL;
//"[in0][in1]amix=inputs=2[out]";
//"aresample=48000,aformat=sample_fmts=fltp:channel_layouts=stereo";
for (int i = 0; i < output_params.audioNum; i++)
{
snprintf(spec_temp, sizeof(spec_temp), "[in%d]", i);
strcat(filter_spec, spec_temp);
}
memset(spec_temp,0,sizeof(spec_temp));
snprintf(spec_temp, sizeof(spec_temp), "amix=inputs=%d[out]", output_params.audioNum);
strcat(filter_spec, spec_temp);
ret = _init_filter(filter_ctx.get(),filter_spec);
if (ret) return ret;
return ErrorValue::EV_SUCCESS;
}
int AudioMixAction::_init_filter(FilteringContext* fctx, const char *filter_spec)
{
char args[512]={0};
char pad_name[10]={0};
int ret = 0;
int i;
AVFilter **buffersrc = (AVFilter**)av_malloc(output_params.audioNum * sizeof(AVFilter*));
AVFilter *buffersink = (AVFilter*)avfilter_get_by_name("abuffersink");
AVFilterContext **buffersrc_ctx = (AVFilterContext**)av_malloc(output_params.audioNum * sizeof(AVFilterContext*));
AVFilterContext *buffersink_ctx = NULL;
AVFilterInOut **outputs = (AVFilterInOut**)av_malloc(output_params.audioNum * sizeof(AVFilterInOut*));
AVFilterInOut *inputs = avfilter_inout_alloc();
AVFilterGraph *filter_graph = avfilter_graph_alloc();
static const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_FLTP, (enum AVSampleFormat) - 1}; //输出sample_fmt
static const int64_t out_channel_layouts[] = {AV_CH_LAYOUT_STEREO, -1}; //输出channel_layouts
static const int out_sample_rates[] = {48000, -1}; //输出sample_rates
AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
uint64_t channel_layout = av_get_default_channel_layout(2);
AVRational time_base = {1,25};
if (!outputs || !inputs || !filter_graph) {
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
for (i = 0; i < output_params.audioNum; i++)
{
buffersrc[i] = (AVFilter*)avfilter_get_by_name("abuffer");
outputs[i] = avfilter_inout_alloc();
}
buffersink = (AVFilter*)avfilter_get_by_name("abuffersink");
if (!buffersrc || !buffersink) {
av_log(NULL, AV_LOG_ERROR, "filtering source or sink element not found\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
for (i = 0; i < output_params.audioNum; i++)
{
memset(args,0,sizeof(args));
snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
time_base.num,time_base.den,sources_vec[i].audioInfo.samples,av_get_sample_fmt_name(sample_fmt),channel_layout);
memset(pad_name,0,sizeof(pad_name));
snprintf(pad_name, sizeof(pad_name), "in%d", i);
ret = avfilter_graph_create_filter(&(buffersrc_ctx[i]), buffersrc[i], pad_name, args, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
}
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",NULL, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts",out_sample_fmts,-1,AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample_fmts\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "channel_layouts",out_channel_layouts,-1,AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output channel_layouts\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates,-1,AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output sample_rates\n");
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
/* Endpoints for the filter graph. */
for (i = 0; i < output_params.audioNum; i++)
{
memset(pad_name,0,sizeof(pad_name));
snprintf(pad_name, sizeof(pad_name), "in%d", i);
outputs[i]->name = av_strdup(pad_name);
outputs[i]->filter_ctx = buffersrc_ctx[i];
outputs[i]->pad_idx = 0;
if (i == output_params.audioNum - 1)
outputs[i]->next = NULL;
else
outputs[i]->next = outputs[i + 1];
}
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
if (!outputs[0]->name || !inputs->name) {
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
ret = avfilter_graph_parse_ptr(filter_graph, filter_spec, &inputs, outputs, NULL);
if (ret < 0)
{
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
ret = avfilter_graph_config(filter_graph, NULL);
if (ret < 0)
{
ret = ErrorValue::EV_ERROR_MIX_AUDIO_INIT_FAILE;
goto end;
}
/* Fill FilteringContext */
fctx->buffersrc_ctx = buffersrc_ctx;
fctx->buffersink_ctx = buffersink_ctx;
fctx->filter_graph = filter_graph;
end:
avfilter_inout_free(&inputs);
av_free(buffersrc);
// av_free(buffersrc_ctx);
avfilter_inout_free(outputs);
av_free(outputs);
return 0;
}