http://www.cnblogs.com/mq0036/p/6514894.html
上面这个程序用c#实现的,获取声卡上的语言,判断音量大小,若音量大于阈值,就截取这段语音,主要是调用了封装好的商业dll
我在《用Python做科学计算》这本电子书上找到了一章介绍pyaudio开源库的,而且正好提供了截获语言例子
http://old.sebug.net/paper/books/scipydoc/wave_pyaudio.html#pyaudio
不过例子是python2的,我稍加改动,改成python3版本
from pyaudio import PyAudio, paInt16
import numpy as np
from datetime import datetime
import wave
# 将data中的数据保存到名为filename的WAV文件中
def save_wave_file(filename, data):
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(SAMPLING_RATE)
for i in data:
wf.writeframes(i)
wf.close()
NUM_SAMPLES = 2000 # pyAudio内部缓存的块的大小
SAMPLING_RATE = 8000 # 取样频率
LEVEL = 1000 # 声音保存的阈值
COUNT_NUM = 20 # NUM_SAMPLES个取样之内出现COUNT_NUM个大于LEVEL的取样则记录声音
SAVE_LENGTH = 8 # 声音记录的最小长度:SAVE_LENGTH * NUM_SAMPLES 个取样
# 开启声音输入
pa = PyAudio()
stream = pa.open(format=paInt16, channels=1, rate=SAMPLING_RATE, input=True,
frames_per_buffer=NUM_SAMPLES)
save_count = 0
save_buffer = []
i = 0
while True:
# 读入NUM_SAMPLES个取样
string_audio_data = stream.read(NUM_SAMPLES)
# 将读入的数据转换为数组
audio_data = np.fromstring(string_audio_data, dtype=np.short)
# 计算大于LEVEL的取样的个数
large_sample_count = np.sum( audio_data > LEVEL )
print (i, ": ", np.max(audio_data))
# 如果个数大于COUNT_NUM,则至少保存SAVE_LENGTH个块
if large_sample_count > COUNT_NUM:
save_count = SAVE_LENGTH
else:
save_count -= 1
if save_count < 0:
save_count = 0
if save_count > 0:
# 将要保存的数据存放到save_buffer中
save_buffer.append( string_audio_data)
else:
# 将save_buffer中的数据写入WAV文件,WAV文件的文件名是保存的时刻
if len(save_buffer) > 0:
filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + ".wav"
save_wave_file(filename, save_buffer)
save_buffer = []
print (filename, "saved")
i+=1