webrtc 静音检测(二)

上一次的文章很久以前了
第一次的简单介绍静音检测

1、使用portaudio 来采集声音

类接口


class DeviceAudio:public c_thread
{
private:
	TSoundInfo * _info = NULL;
	static vector<AudioCapacity> g_AudioDevice;

protected:
	static int GetDeviceList(vector<AudioCapacity> &deviceList);

public:
	DeviceAudio(){
		
	}
	~DeviceAudio() {
#if(USE_FAAC)
		if (_aPacket != NULL)
			delete _aPacket;
#endif
		/*if(_pkt!=NULL)
			av_packet_free(&_pkt);*/
	}
	//int _channel = 1;

	int Encode(uint8_t * inputBuffer, int audioSize,int channel);



	func_send _ptr = NULL;
	int GetChannel()
	{
		if (_info != NULL)
			return _info->channels;
		return -1;
	}

	static int Init();
	static int UnInit();
	static int GetDeviceDefault(int &in, string &inname, int &out, string &outname);
	static int GetDeviceCount();
	static const char *GetDeviceName(int index);
	static const char *GetDeviceCapacity(int index,int &samplerate, int &channel);
	//开始时初始化音频和音频编码参数
	//int Start();
	int Open(TSoundInfo *_info);
	void Stop();
	void Run();
	//int isStopped();
	//int _stop = 0;
};

以下为类的实现代码,如果需要ffmpeg编码,打开编译开关

#include "DeviceAudio.h"


extern "C"  {
#include "portaudio/portaudio.h"
}
#include <csignal>

using namespace std;


vector<AudioCapacity> DeviceAudio::g_AudioDevice;

static int AudioRecordCallback(const void *inputBuffer, void *outputBuffer,
	unsigned long framesPerBuffer,
	const PaStreamCallbackTimeInfo* timeInfo,
	PaStreamCallbackFlags statusFlags,
	void *userData)
{
	DeviceAudio *process = (DeviceAudio*)userData;
	//const short *rptr = (const short*)inputBuffer;
	int finished = paContinue;
	//注意2 通道
	int channel = process->GetChannel();
	int audioSize = (framesPerBuffer * sizeof(short) * channel);

	(void)outputBuffer; /* Prevent unused variable warnings. */
	(void)timeInfo;
	(void)statusFlags;


	if (process->IsStop() ==1) {
		finished = paComplete;
		return 0;
	}
	if (process->_ptr != NULL)
	{
#ifdef USE_AAC //注意这里是ffmpeg编码
		AVPacket *pkt = av_packet_alloc();
		av_init_packet(pkt);
		if (process->GetEncoder()->Encode((uint8_t*)inputBuffer, audioSize, channel, pkt) != NULL)
			process->_ptr(pkt);
		else
			av_packet_free(&pkt);
#else
		//使用编码器编码
		process->Encode((uint8_t*)inputBuffer, audioSize,channel);
#endif
	}
	return finished;
}
int DeviceAudio::Encode(uint8_t * inputBuffer, int audioSize,int channel)
{
	if (_ptr == NULL)
		return -1;
	_ptr(inputBuffer,audioSize);
	return 0;
}

int DeviceAudio::GetDeviceDefault(int &in, string &inname, int &out, string &outname)
{
	in = Pa_GetDefaultInputDevice();
	const PaDeviceInfo *pInfo = Pa_GetDeviceInfo(in);
	inname = pInfo->name;
	out = Pa_GetDefaultOutputDevice();
	pInfo = Pa_GetDeviceInfo(out);
	outname = pInfo->name;
	return 2;
}

int DeviceAudio::GetDeviceList(vector<AudioCapacity> &deviceList)
{
	PaDeviceIndex num = Pa_GetDeviceCount();
	for (int i = 0; i < num; i++)
	{
		const PaDeviceInfo *pInfo = Pa_GetDeviceInfo(i);
		AudioCapacity cp;
		cp.name = pInfo->name;
		cp.channel = pInfo->maxInputChannels;
		cp.samplerate =(int)pInfo->defaultSampleRate;
		deviceList.push_back(cp);
	}
	if (deviceList.size()>0)
		return 0;
	return -1;
}

PaError DeviceAudio::Init()
{
	int err = Pa_Initialize();
	if (err != paNoError) goto done;
	return 0;
done:
	Pa_Terminate();
	if (err != paNoError)
	{
		fprintf(stderr, "An error occured while using the portaudio stream\n");
		fprintf(stderr, "Error number: %d\n", err);
		fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));
		err = 1;          /* Always return 0 or 1, but no other return codes. */
	}
	return err;
}
//framesPerBuffer * sizeof(float) * 通道数

//return  0 is correct, other failure, author : qianbo 
int DeviceAudio::Open(TSoundInfo *info)
{
	_info = info;
	_ptr = info->callback;
	return 0;
}
//C = A + B - (A * B >> 0x10)
//if (C > 32767) C = 32767;
//else if (C < -32768) C = -32768;
void DeviceAudio::Stop()
{
	c_thread::Stop();
	Notify();
	Join();
	
}

//int DeviceAudio::isStopped()
//{
//	//fflush(stdout);
//	return Pa_IsStreamActive(_recordStream);
//}

int DeviceAudio::UnInit()
{
	return Pa_Terminate();

}

//得到音频设备的个数
int DeviceAudio::GetDeviceCount()
{
	g_AudioDevice.clear();
	DeviceAudio::GetDeviceList(g_AudioDevice);
	int ret = (int)g_AudioDevice.size();
	return ret;
}

const char *DeviceAudio::GetDeviceName(int index)
{
	if (index < (int)g_AudioDevice.size() && index>= 0)
	{
		return g_AudioDevice[index].name.c_str();
	}
	return NULL;
}
const char *DeviceAudio::GetDeviceCapacity(int index ,int &samplerate, int &channel)
{
	if (index < (int)g_AudioDevice.size() && index >= 0)
	{
		samplerate = g_AudioDevice[index].samplerate;
		channel = g_AudioDevice[index].channel;
		return g_AudioDevice[index].name.c_str();
	}
	return NULL;
}



void DeviceAudio::Run()
{
	PaError             err = paNoError;
	PaStreamParameters  inputParameters;
	PaStream*           stream;



	if (_info->deviceindex == -1)/* default input device */
		inputParameters.device = Pa_GetDefaultInputDevice();
	else
		inputParameters.device = _info->deviceindex;

	if (inputParameters.device == paNoDevice) {
		fprintf(stderr, "Error: No default input device.\n");
		return;
		//goto done;
	}
	const PaDeviceInfo * pInfo = Pa_GetDeviceInfo(inputParameters.device);

	//直接取最大通道数目
	//int channelNum = pInfo->maxInputChannels ;
	//if (channelNum > 2)
	//channelNum = 1;
	inputParameters.channelCount = 1; 

	inputParameters.sampleFormat = paInt16;
	inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
	inputParameters.hostApiSpecificStreamInfo = NULL;
	err = Pa_OpenStream(
		&stream,
		&inputParameters,
		NULL,                  /* &outputParameters, */
		_info->sample_rate,
		//_info->sample_rate, //44100,
		_info->nb_samples, //1024,
		paClipOff,      /* we won't output out of range samples so don't bother clipping them */
		AudioRecordCallback,
		(void*)this);
	if (err != paNoError)
		return ;

	Pa_StartStream(stream);

	WaitForSignal(); //等待结束

	if (stream!= NULL)
	{
		Pa_AbortStream(stream);
		Pa_CloseStream(stream);
	}
}

2、使用webrtc的静音检测的接口

首先,一定要沉下心来研究webrtc的几个接口,把以下接口仔细研究一遍

#ifdef __cplusplus
extern "C" {
#endif

// Creates an instance to the VAD structure.
//
// - handle [o] : Pointer to the VAD instance that should be created.
//
// returns      : 0 - (OK), -1 - (Error)
int WebRtcVad_Create(VadInst** handle);

// Frees the dynamic memory of a specified VAD instance.
//
// - handle [i] : Pointer to VAD instance that should be freed.
//
// returns      : 0 - (OK), -1 - (NULL pointer in)
int WebRtcVad_Free(VadInst* handle);

// Initializes a VAD instance.
//
// - handle [i/o] : Instance that should be initialized.
//
// returns        : 0 - (OK),
//                 -1 - (NULL pointer or Default mode could not be set).
int WebRtcVad_Init(VadInst* handle);

// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
// restrictive in reporting speech. Put in other words the probability of being
// speech when the VAD returns 1 is increased with increasing mode. As a
// consequence also the missed detection rate goes up.
//
// - handle [i/o] : VAD instance.
// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
//
// returns        : 0 - (OK),
//                 -1 - (NULL pointer, mode could not be set or the VAD instance
//                       has not been initialized).
int WebRtcVad_set_mode(VadInst* handle, int mode);

// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
//
// - handle       [i/o] : VAD Instance. Needs to be initialized by
//                        WebRtcVad_Init() before call.
// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
// - audio_frame  [i]   : Audio frame buffer.
// - frame_length [i]   : Length of audio frame buffer in number of samples.
//
// returns              : 1 - (Active Voice),
//                        0 - (Non-active Voice),
//                       -1 - (Error)
int WebRtcVad_Process(VadInst* handle, int fs, int16_t* audio_frame,
                      int frame_length);

// Checks for valid combinations of |rate| and |frame_length|. We support 10,
// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
//
// - rate         [i] : Sampling frequency (Hz).
// - frame_length [i] : Speech frame buffer length in number of samples.
//
// returns            : 0 - (valid combination), -1 - (invalid combination)
int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length);

#ifdef __cplusplus
}
#endif

3、如何封装调用

我们在类中来初始化

	int init()
	{
		if (_init == -1)
		{

			status = WebRtcVad_Create(&handle);
			if (status != 0)
			{
				printf("Init is error\n");
				return -1;
			}
			status = WebRtcVad_Init(handle);
			if (status != 0)
			{
				return -1;
			}
			//这里强度为2 ,可以修改
			status = WebRtcVad_set_mode(handle, 2);
			if (status != 0)
			{
				printf("Set mode is error\n");
				return -1;
			}
			_init = 0;
			return 0;
		}
		return 0;
	}

具体调用的detect函数,fs取值16000

int detect(short *data, int length)
	{
		status = WebRtcVad_Process(handle, fs, data, length);
		if (status == -1)
		{
			printf("WebRtcVad_Process is error\n");
			return -1;
		}
		return status;
		//return 0;
	}

以下为类封装


class c_audio_util
{
	c_vad_check _vadcheck;

	int vad_check0(uint8_t *data, int len)
	{
		//len = 16000;
		//fl = 480;
		int lenl = len / sizeof(short); // 2048    0-479 480+480-960  961-1441 1442
		short* start = (short*)data;
		short* end = (short*)(start + lenl);
		int a[5] = { 0,0,0,0,0 };
		int i = 0;
		while (start < end)
		{
			a[i++] = _vadcheck.detect(start, 480);
			start += 480;
			int dlen = end - start;
			if (end - start < 480)
				break;
		}
		start = end - 480;
		a[i] = _vadcheck.detect(start, 480);
	}

	int vad_check(uint8_t* data, int len)
	{
		//160
		//int ret = WebRtcVad_ValidRateAndFrameLength(16000, 480);
		int lenl = len / sizeof(short); // 2048    0-479 480+480-960  961-1441 1442
		short* start = (short*)data;
		short* end = (short*)(start + lenl);
		//int a[5] = { 0,0,0,0,0 };
		int i = 0;
		int status = 0;
		while (start < end)
		{
			status = _vadcheck.detect(start, 480);
			if (status == 1)
				break;
			start += 480;
			//int dlen = end - start;
			if (end - start < 480)
				break;
		}
		if (status == 1)
			return 1;
		start = end - 480;
		return _vadcheck.detect(start, 480);
	}
	uint64_t nn = 0;
public:
	void func_send_audio(uint8_t* data, int len)
	{
		int status = vad_check(data, len);
		printf("%d ", status);
		if (nn++ % 16 == 0)
			printf("\n");
		if (status == 1) //active voice
		{
			printf("1");
			//if (_mode == MODE_CMD)
			//{
			//	string cmd = encode64(data, len);
			//	_robotcmd.push(cmd, 0);
			//}
			//else if (_mode = MODE_WRITE) //
			//{
			//	//OutMessage("status :%d ", status);
			//	//静音检测
			//	if (m_status == _Open && this->con != nullptr)
			//	{
			//		websocketpp::lib::error_code ec;
			//		_c.send(this->con->get_handle(), data, len, websocketpp::frame::opcode::BINARY, ec);
			//		if (ec) {
			//			func_get_message(_Failed, "发送错误");
			//			//m_status = _Closed;
			//		}
			//	}
			//}
		}
		else if (status == 0)
		{
			//if (_mode == MODE_CMD && !_strbase64.empty()) //發送到服務器
			//{

			//}
		}

	}
};

主程序调用

int main()
{
	DeviceAudio::Init();
	DeviceAudio _audio;
	TSoundInfo _info;
	c_audio_util _util;
	_info.deviceindex = -1;
	_info.bit_rate = 16000;
	_info.channels = 1;
	_info.nb_samples = 2048;
	_info.sample_rate = 16000;
	_info.callback = std::bind(&c_audio_util::func_send_audio, &_util, 
		std::placeholders::_1, std::placeholders::_2);
	_audio.Open(&_info);
	_audio.Start();

}

至此整个流程完毕,下一章将会具体讲解流程细节和算法以及源代码

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

qianbo_insist

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值