python 通过提取声音的特征做对比
1. 环境准备
安装必要的Python库,如librosa(用于音频和音乐分析)、numpy(数学运算)、scipy(信号处理)、matplotlib(绘图)。
pip install librosa numpy scipy matplotlib
2.声音文件的读取
介绍如何使用librosa库读取音频文件。
示例代码:加载音频文件,获取音频信号和采样率。
import librosa
def read_audio_file(file_path):
y, sr = librosa.load(file_path, sr=None)
return y, sr
3. 声音特征提取
简要介绍几种常见的音频特征,如MFCC(梅尔频率倒谱系数)、CQT(恒定Q变换)、短时能量、过零率等。
示例代码:使用librosa提取MFCC特征
recorded_audio, sr_recorded = read_audio_file(r'C:\Users\Desktop\cdc\pythonP56\audio\02 紧急故障.wav')
local_audio, sr_local = read_audio_file(r'C:\Users\Desktop\cdc\pythonP56\audio\01 热失控.wav')
# 提取MFCC特征
feature_recorded = np.mean(librosa.feature.mfcc(y=recorded_audio, sr=sr_recorded, n_mfcc=13), axis=1)
feature_local = np.mean(librosa.feature.mfcc(y=local_audio, sr=sr_local, n_mfcc=13), axis=1)
4. 特征对比策略
介绍几种常见的特征对比方法,如欧氏距离、余弦相似度、动态时间规整(DTW)等。
示例代码:使用计算余弦相似度对比两段音频的MFCC特征。
# 计算余弦相似度:
similarity = np.dot(feature_recorded, feature_local) / (
np.linalg.norm(feature_recorded) * np.linalg.norm(feature_local))
print("音频1和音频2之间的相似度:", similarity)
if similarity > 0.9:
return True
else:
return False
5. 完整code
import pyaudio
import wave
import librosa
import numpy as np
# 录制麦克风音频
def record_audio(seconds):
chunk = 1024
format = pyaudio.paInt16
channels = 1
rate = 44100
p = pyaudio.PyAudio()
stream = p.open(format=format,
channels=channels,
rate=rate,
input=True,
frames_per_buffer=chunk)
frames = []
for i in range(0, int(rate / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open('recorded_audio.wav', 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(format))
wf.setframerate(rate)
wf.writeframes(b''.join(frames))
wf.close()
# 读取本地音频文件
def read_audio_file(file_path):
y, sr = librosa.load(file_path, sr=None)
return y, sr
# 对比两个音频文件的特征
def compare_audio_files():
recorded_audio, sr_recorded = read_audio_file(r'C:\Users\Desktop\cdc\pythonP56\audio\02 紧急故障.wav')
local_audio, sr_local = read_audio_file(r'C:\Users\Desktop\cdc\pythonP56\audio\01 热失控.wav')
# 提取MFCC特征
feature_recorded = np.mean(librosa.feature.mfcc(y=recorded_audio, sr=sr_recorded, n_mfcc=13), axis=1)
feature_local = np.mean(librosa.feature.mfcc(y=local_audio, sr=sr_local, n_mfcc=13), axis=1)
# 计算余弦相似度:
similarity = np.dot(feature_recorded, feature_local) / (
np.linalg.norm(feature_recorded) * np.linalg.norm(feature_local))
print("音频1和音频2之间的相似度:", similarity)
if similarity > 0.9:
return True
else:
return False
# 调用函数录制麦克风音频
record_audio(5)
# 对比录制的麦克风音频和本地音频文件
result = compare_audio_files()
print(result)