最近在看webrtc的混音,几路声音混成一路,直接用原始音频流做相应的叠加处理,不是用ffmpeg的amix滤镜,为此,我想根据这样的算法,写个demo,将两个原始音频流pcm混音成一个pcm文件。
所以,这里写一个demo,读取系统声音,然后向文件写原始流,生成pcm文件。
本人之前写过如何抓取系统声音,需要先安装软件Setup Screen Capturer Recorder v0.12.11.exe
如下所示,本人将读取到的音频数据,写入文件,第二个参数是通道数x每个样例的字节数,第三个参数readcount为样例数。
fwrite(frame_audio_inner->data[0], m_pReadCodecCtx_AudioInner->channels * m_iPerBytes, readcount, m_fpPcm);
这里的通道数为2,每个样例的字节数是16,每个样例的字节数通过下面方式获取:
m_iPerBytes = av_get_bytes_per_sample(m_pReadCodecCtx_AudioInner->sample_fmt);
最后,生成的pcm文件可以通过下面工具进行播放验证,播放之前,采样率,样本精度,声道数需要设置正确。
代码结构如下:
其中,FfmpegGeneratePcm.cpp的内容如下:
#include <iostream>
#include "VoiceCapture.h"
#include <vector>
int main()
{
CVoiceCapture cVoiceCapture;
const char *pFileA = "E:\\learn\\ffmpeg\\FfmpegGeneratePcm\\x64\\Release\\pcm_inner_audio.pcm";
cVoiceCapture.StartGenerateInnerVoice(pFileA);
Sleep(30000);
cVoiceCapture.StopGenerateInnerVoice();
return 0;
}
VoiceCapture.h的内容如下:
#pragma once
#include <Windows.h>
#include <string>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
class CVoiceCapture
{
public:
CVoiceCapture();
~CVoiceCapture();
public:
int StartGenerateInnerVoice(const char *pFile);
void StopGenerateInnerVoice();
public:
static DWORD WINAPI AudioInnerCaptureProc(LPVOID lpParam);
void AudioInnerCapture();
static DWORD WINAPI AudioInnerWriteProc(LPVOID lpParam);
void AudioInnerWrite();
private:
int OpenAudioInnerCapture();
private:
CRITICAL_SECTION m_csAudioInnerSection;
AVAudioFifo *m_pAudioInnerFifo = NULL;
AVFormatContext *m_pFormatCtx_AudioInner = NULL;
AVCodecContext *m_pReadCodecCtx_AudioInner = NULL;
FILE *m_fpPcm = NULL;
HANDLE m_hAudioInnerCapture = NULL;
HANDLE m_hAudioWrite = NULL;
bool m_bStart = false;
int m_iPerBytes = 2;
};
VoiceCapture.cpp的内容如下:
#include "VoiceCapture.h"
static char *dup_wchar_to_utf8(const wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
CVoiceCapture::CVoiceCapture()
{
InitializeCriticalSection(&m_csAudioInnerSection);
}
CVoiceCapture::~CVoiceCapture()
{
DeleteCriticalSection(&m_csAudioInnerSection);
}
int CVoiceCapture::StartGenerateInnerVoice(const char *pFile)
{
int ret = -1;
do
{
avdevice_register_all();
m_fpPcm = fopen(pFile, "wb");
if (NULL == m_fpPcm)
{
break;
}
ret = OpenAudioInnerCapture();
if (ret != 0)
{
break;
}
if (NULL == m_pAudioInnerFifo)
{
m_pAudioInnerFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_AudioInner->streams[0]->codecpar->format,
m_pFormatCtx_AudioInner->streams[0]->codecpar->channels, 3000 * 1024);
}
m_bStart = true;
m_hAudioInnerCapture = CreateThread(NULL, 0, AudioInnerCaptureProc, this, 0, NULL);
m_hAudioWrite = CreateThread(NULL, 0, AudioInnerWriteProc, this, 0, NULL);
} while (0);
return ret;
}
void CVoiceCapture::StopGenerateInnerVoice()
{
m_bStart = false;
WaitForSingleObject(m_hAudioInnerCapture, INFINITE);
CloseHandle(m_hAudioInnerCapture);
m_hAudioInnerCapture = NULL;
WaitForSingleObject(m_hAudioWrite, INFINITE);
CloseHandle(m_hAudioWrite);
m_hAudioWrite = NULL;
if (m_fpPcm != NULL)
{
fclose(m_fpPcm);
m_fpPcm = NULL;
}
}
int CVoiceCapture::OpenAudioInnerCapture()
{
int iRet = -1;
do
{
//查找输入方式
const AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
//以Direct Show的方式打开设备,并将 输入方式 关联到格式上下文
//const char * psDevName = dup_wchar_to_utf8(L"audio=麦克风 (2- Synaptics HD Audio)");
char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");
if (avformat_open_input(&m_pFormatCtx_AudioInner, psDevName, pAudioInputFmt, NULL) < 0)
{
break;
}
if (avformat_find_stream_info(m_pFormatCtx_AudioInner, NULL) < 0)
{
break;
}
if (m_pFormatCtx_AudioInner->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
{
break;
}
const AVCodec *tmpCodec = avcodec_find_decoder(m_pFormatCtx_AudioInner->streams[0]->codecpar->codec_id);
m_pReadCodecCtx_AudioInner = avcodec_alloc_context3(tmpCodec);
m_pReadCodecCtx_AudioInner->sample_rate = m_pFormatCtx_AudioInner->streams[0]->codecpar->sample_rate;
m_pReadCodecCtx_AudioInner->channel_layout = AV_CH_LAYOUT_STEREO;
m_pReadCodecCtx_AudioInner->channels = av_get_channel_layout_nb_channels(m_pReadCodecCtx_AudioInner->channel_layout);
m_pReadCodecCtx_AudioInner->sample_fmt = (AVSampleFormat)m_pFormatCtx_AudioInner->streams[0]->codecpar->format;
m_iPerBytes = av_get_bytes_per_sample(m_pReadCodecCtx_AudioInner->sample_fmt);
if (0 > avcodec_open2(m_pReadCodecCtx_AudioInner, tmpCodec, NULL))
{
break;
}
avcodec_parameters_from_context(m_pFormatCtx_AudioInner->streams[0]->codecpar, m_pReadCodecCtx_AudioInner);
iRet = 0;
} while (0);
if (iRet != 0)
{
if (m_pReadCodecCtx_AudioInner != NULL)
{
avcodec_free_context(&m_pReadCodecCtx_AudioInner);
m_pReadCodecCtx_AudioInner = NULL;
}
if (m_pFormatCtx_AudioInner != NULL)
{
avformat_close_input(&m_pFormatCtx_AudioInner);
m_pFormatCtx_AudioInner = NULL;
}
}
return iRet;
}
DWORD WINAPI CVoiceCapture::AudioInnerCaptureProc(LPVOID lpParam)
{
CVoiceCapture *pCVoiceCapture = (CVoiceCapture *)lpParam;
if (pCVoiceCapture != NULL)
{
pCVoiceCapture->AudioInnerCapture();
}
return 0;
}
void CVoiceCapture::AudioInnerCapture()
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
AVPacket packet = { 0 };
int ret = 0;
while (m_bStart)
{
av_packet_unref(&packet);
ret = av_read_frame(m_pFormatCtx_AudioInner, &packet);
if (ret < 0)
{
continue;
}
ret = avcodec_send_packet(m_pReadCodecCtx_AudioInner, &packet);
if (ret >= 0)
{
ret = avcodec_receive_frame(m_pReadCodecCtx_AudioInner, pFrame);
if (ret == AVERROR(EAGAIN))
{
continue;
}
else if (ret == AVERROR_EOF)
{
break;
}
else if (ret < 0) {
break;
}
int buf_space = av_audio_fifo_space(m_pAudioInnerFifo);
if (buf_space >= pFrame->nb_samples)
{
//AudioSection
EnterCriticalSection(&m_csAudioInnerSection);
ret = av_audio_fifo_write(m_pAudioInnerFifo, (void **)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&m_csAudioInnerSection);
//fwrite(pFrame->data[0], 4, pFrame->nb_samples, m_fpPcm);
}
av_packet_unref(&packet);
}
}
av_frame_free(&pFrame);
}
DWORD WINAPI CVoiceCapture::AudioInnerWriteProc(LPVOID lpParam)
{
CVoiceCapture *pCVoiceCapture = (CVoiceCapture *)lpParam;
if (pCVoiceCapture != NULL)
{
pCVoiceCapture->AudioInnerWrite();
}
return 0;
}
void CVoiceCapture::AudioInnerWrite()
{
int ret = 0;
while (m_bStart)
{
AVFrame *frame_audio_inner = NULL;
frame_audio_inner = av_frame_alloc();
frame_audio_inner->nb_samples = 1024;
frame_audio_inner->channel_layout = AV_CH_LAYOUT_STEREO;
frame_audio_inner->format = m_pFormatCtx_AudioInner->streams[0]->codecpar->format;
frame_audio_inner->sample_rate = m_pFormatCtx_AudioInner->streams[0]->codecpar->sample_rate;
av_frame_get_buffer(frame_audio_inner, 0);
EnterCriticalSection(&m_csAudioInnerSection);
int readcount = av_audio_fifo_read(m_pAudioInnerFifo, (void **)frame_audio_inner->data,
(m_pFormatCtx_AudioInner->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_AudioInner->streams[0]->codecpar->frame_size : 1024));
LeaveCriticalSection(&m_csAudioInnerSection);
fwrite(frame_audio_inner->data[0], m_pReadCodecCtx_AudioInner->channels * m_iPerBytes, readcount, m_fpPcm);
av_frame_free(&frame_audio_inner);
}
}