最近公司需要为广播站提供音频比对,所以使用python对知名的飞利浦音频基因算法进行了验证。
1.导入依赖
# 读取波形依赖
import wave
# 画图依赖
import pylab as pl
# fft及计算依赖
import numpy as np
# mel scale 依赖
import librosa
# 色彩依赖
from matplotlib import colors
2.先画出母本和样本的波形
母本样本皆为实际项目中采集的具有特征的模型
# 打开WAV文档
# 这个母样本,样本收到强烈的干扰
sample = wave.open(r"/Users/jason12360/Desktop/vobile/AudioCompare/test1/1570516260761/7b6601b8-48ac-41.wav", "rb")
meta = wave.open(r"/Users/jason12360/Desktop/vobile/AudioCompare/test1/1570516260761/43a7c85f-9a13-42.wav", "rb")
# 这个母样本比较正常
# meta = wave.open(r"/Users/jason12360/Desktop/vobile/notmatch/116/1571880420080-103/5fe166f2-c633-44", "rb")
# sample = wave.open(r"/Users/jason12360/Desktop/vobile/notmatch/116/1571880420080-103/43437227-f70c-40", "rb")
# 读取格式信息
# (nchannels, sampwidth, framerate, nframes, comptype, compname)
sample_params = sample.getparams()
meta_params = meta.getparams()
sample_nchannels, sample_sampwidth, sample_framerate, sample_nframes = sample_params[:4]
meta_nchannels, meta_sampwidth, meta_framerate, meta_nframes = meta_params[:4]
sample_time = np.arange(0, sample_nframes) * (1.0 / sample_framerate)
sample_time_min = sample_time[0]
sample_time_max = sample_time[-1]
meta_time = np.arange(0, meta_nframes) * (1.0 / meta_framerate)
meta_time_min = sample_time[0]
meta_time_max = sample_time[-1]
# 每个frame的时长
frame_duration = 0.37
# 重复比例
overlap_ratio = 31/32
# 每个frame的大小,单位为帧的宽度即samplewidth
sample_frame_size = int( frame_duration * sample_framerate)
meta_frame_size = int( frame_duration * meta_framerate)
# 每次抓取frame前进的步长
sample_step_frames = int(sample_frame_size*(1-overlap_ratio))
meta_step_frames = int(meta_frame_size*(1-overlap_ratio))
# 每个音频分为多少小节,每小节生成bands-1个bits的基因
sample_bins = int((sample_nframes-sample_frame_size)/sample_step_frames)
meta_bins = int((meta_nframes-meta_frame_size)/meta_step_frames)
# 每个frame频谱的需要划分的间隔数,每frame会生成bands-1个bits的基因
bands = 33
min_frequency = 300
max_frequency