python声音捕获_使用Python检测和录制音频

最新推荐文章于 2024-08-14 14:25:54 发布

胖厨胡学斌

最新推荐文章于 2024-08-14 14:25:54 发布

阅读量899

点赞数

文章标签： python声音捕获

本文链接：https://blog.csdn.net/weixin_31528001/article/details/111922951

版权

这段代码展示了如何使用Python的PyAudio库来检测和录制音频。通过设置阈值和静音块数来判断是否开始和结束录音，并进行了音频修剪和归一化处理，以确保声音质量。

摘要由CSDN通过智能技术生成

感谢cryo的改进版本，我基于下面的测试代码：

#Instead of adding silence at start and end of recording (values=0) I add the original audio . This makes audio sound more natural as volume is >0. See trim()

#I also fixed issue with the previous code - accumulated silence counter needs to be cleared once recording is resumed.

from array import array

from struct import pack

from sys import byteorder

import copy

import pyaudio

import wave

THRESHOLD = 500 # audio levels not normalised.

CHUNK_SIZE = 1024

SILENT_CHUNKS = 3 * 44100 / 1024 # about 3sec

FORMAT = pyaudio.paInt16

FRAME_MAX_VALUE = 2 ** 15 - 1

NORMALIZE_MINUS_ONE_dB = 10 ** (-1.0 / 20)

RATE = 44100

CHANNELS = 1

TRIM_APPEND = RATE / 4

def is_silent(data_chunk):

"""Returns 'True' if below the 'silent' threshold"""

return max(data_chunk) < THRESHOLD

def normalize(data_all):

"""Amplify the volume out to max -1dB"""

# MAXIMUM = 16384

normalize_factor = (float(NORMALIZE_MINUS_ONE_dB * FRAME_MAX_VALUE)

/ max(abs(i) for i in data_all))

r = array('h')

for i in data_all:

r.append(int(i * normalize_factor))

return r

def trim(data_all):

_from = 0

_to = len(data_all) - 1

for i, b in enumerate(data_all):

if abs(b) > THRESHOLD:

_from = max(0, i - TRIM_APPEND)

break

for i, b in enumerate(reversed(data_all)):

if abs(b) > THRESHOLD:

_to = min(len(data_all) - 1, len(data_all) - 1 - i + TRIM_APPEND)

break

return copy.deepcopy(data_all[_from:(_to + 1)])

def record():

"""Record a word or words from the microphone and

return the data as an array of signed shorts."""

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK_SIZE)

silent_chunks = 0

audio_started = False

data_all = array('h')

while True:

# little endian, signed short

data_chunk = array('h', stream.read(CHUNK_SIZE))

if byteorder == 'big':

data_chunk.byteswap()

data_all.extend(data_chunk)

silent = is_silent(data_chunk)

if audio_started:

if silent:

silent_chunks += 1

if silent_chunks > SILENT_CHUNKS:

break

else:

silent_chunks = 0

elif not silent:

audio_started = True

sample_width = p.get_sample_size(FORMAT)

stream.stop_stream()

stream.close()

p.terminate()

data_all = trim(data_all) # we trim before normalize as threshhold applies to un-normalized wave (as well as is_silent() function)

data_all = normalize(data_all)

return sample_width, data_all

def record_to_file(path):

"Records from the microphone and outputs the resulting data to 'path'"

sample_width, data = record()

data = pack('

wave_file = wave.open(path, 'wb')

wave_file.setnchannels(CHANNELS)

wave_file.setsampwidth(sample_width)

wave_file.setframerate(RATE)

wave_file.writeframes(data)

wave_file.close()

if __name__ == '__main__':

print("Wait in silence to begin recording; wait in silence to terminate")

record_to_file('demo.wav')

print("done - result written to demo.wav")

胖厨胡学斌

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫