TOOLS_Python获取音域范围

baby_hua

已于 2023-06-15 16:37:34 修改

阅读量274

点赞数 2

分类专栏： Python 文章标签： python

于 2023-06-13 18:51:11 首次发布

本文链接：https://blog.csdn.net/baby_hua/article/details/131194318

版权

Python 专栏收录该内容

23 篇文章 0 订阅

订阅专栏

基于librosa.pyin方法（链接）获取基频最值，对比标准音高序列，得到音域范围；

def create_standard_pitch_sequence():
    """
        生成一个包含名称的标准音高序列
    """
    T = ["C","C♯/D♭", "D", "D♯/E♭","E","F","F♯/G♭","G","G♯/A♭","A","A♯/B♭","B"]
    t0 = [0,1,2,3,4,5,6,7,8,9]
    c = [[16.352, 32.703,65.406,130.81,261.63,523.25,1046.5,2093.0,4186.0,8372.0],
        [17.324,34.648,69.296,138.59,277.18,554.37,1108.7,2217.5,4434.9,8869.8],
        [18.354,36.708,73.416,146.83,293.66,587.33,1174.7,2349.3,4698.6,9397.3],
        [19.445,38.891,77.782,155.56,311.13,622.25,1244.5,2489.0,4978.0,9956.1],
        [20.602,41.203,82.407,164.81,329.63,659.26,1318.5,2637.0,5274.0,10548],
        [21.827,43.654,87.307,174.61,349.23,698.46,1396.9,2793.8,5587.7,11175],
        [23.125,46.249,92.499,185.00,369.99,739.99,1480.0,2960.0,5919.9,11840],
        [24.500,48.999,97.999,196.00,392.00,783.99,1568.0,3136.0,6271.9,12544],
        [25.957,51.913,103.83,207.65,415.30,830.61,1661.2,3322.4,6644.9,13290],
        [27.500,55.000,110.00,220.00,440.00,880.00,1760.0,3520.0,7040.0,14080],
        [29.135,58.270,116.54,233.08,466.16,932.33,1864.7,3729.3,7458.6,14917],
        [30.868,61.735,123.47,246.94,493.88,987.77,1975.5,3951.1,7902.1,15804]]


    PITCH_LIST = []    
    for i,itemt0 in enumerate(t0):
        for j,itemT in enumerate(T):
            pinLabel = "{}{}".format(itemT, itemt0)
            pinValue = c[j][i]
            PITCH_LIST.append((pinLabel,pinValue))

    return PITCH_LIST

使用文档中的测试代码：

import librosa

y, sr = librosa.load(librosa.ex('trumpet'))
f0, voiced_flag, voiced_probs = librosa.pyin(y,
                                             fmin=librosa.note_to_hz('C0'),
                                             fmax=librosa.note_to_hz('B9'))
times = librosa.times_like(f0)

看下可视化的效果：

import matplotlib.pyplot as plt
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
fig, ax = plt.subplots()
ax.set(title='pYIN fundamental frequency estimation')
ax.plot(times, f0, label='f0', color='cyan', linewidth=3)
ax.legend(loc='upper right')

在这里插入图片描述
定义对比方法：


def detect_vocal_pitchrange_info_by(f0, PITCH_LIST=None):
    """
        通过librosa提取的基频序列，获取声音中的音域信息
    """
    if PITCH_LIST is None:
        PITCH_LIST = create_standard_pitch_sequence()

    n_f0 = np.array([item for item in list(f0) if np.isnan(item) == False])
    print("基频的最值：",n_f0.min(),n_f0.max())
    
    s_v_list = [(item[0],item[1],abs(item[1] - n_f0.min())) for item in PITCH_LIST if item[1] - n_f0.min() >= 0]
    e_v_list = [(item[0],item[1],abs(item[1] - n_f0.max())) for item in PITCH_LIST if item[1] - n_f0.max() <= 0]
    
    s_v_list.sort(key=lambda x: x[2], reverse=False)
    e_v_list.sort(key=lambda x: x[2], reverse=False)
    
    s_p = s_v_list[0]
    e_p = e_v_list[0]
    print("音域下限：",s_p)
    print("音域上限：",e_p)
    
    return {
        "f0_min": n_f0.min(),# 基频最小值
        "f0_max": n_f0.max(),# 基频最大值
        "r_start": s_p[0],   # 音域下限音高
        "r_end": e_p[0],     # 音域上限音高
        "r_start_f": s_p[1], # 音域下限音高所对应的频率
        "r_end_f": e_p[1],   # 音域上限音高所对应的频率
    }

测试：

if __name__ == "__main__":
    detect_vocal_pitchrange_info_by(f0)

输出：

基频的最值： 345.2170030745704 625.8586480068041
音域下限： ('F4', 349.23, 4.012996925429604)
音域上限： ('D♯/E♭5', 622.25, 3.60864800680406)

{'f0_min': 345.2170030745704,
 'f0_max': 625.8586480068041,
 'r_start': 'F4',
 'r_end': 'D♯/E♭5',
 'r_start_f': 349.23,
 'r_end_f': 622.25}

增加优化代码后的完整实现

import librosa
import matplotlib.pyplot as plt


def create_standard_pitch_sequence():
    """
        生成一个包含名称的标准音高序列
    """
    T = ["C","C♯/D♭", "D", "D♯/E♭","E","F","F♯/G♭","G","G♯/A♭","A","A♯/B♭","B"]
    t0 = [0,1,2,3,4,5,6,7,8,9]
    c = [[16.352, 32.703,65.406,130.81,261.63,523.25,1046.5,2093.0,4186.0,8372.0],
        [17.324,34.648,69.296,138.59,277.18,554.37,1108.7,2217.5,4434.9,8869.8],
        [18.354,36.708,73.416,146.83,293.66,587.33,1174.7,2349.3,4698.6,9397.3],
        [19.445,38.891,77.782,155.56,311.13,622.25,1244.5,2489.0,4978.0,9956.1],
        [20.602,41.203,82.407,164.81,329.63,659.26,1318.5,2637.0,5274.0,10548],
        [21.827,43.654,87.307,174.61,349.23,698.46,1396.9,2793.8,5587.7,11175],
        [23.125,46.249,92.499,185.00,369.99,739.99,1480.0,2960.0,5919.9,11840],
        [24.500,48.999,97.999,196.00,392.00,783.99,1568.0,3136.0,6271.9,12544],
        [25.957,51.913,103.83,207.65,415.30,830.61,1661.2,3322.4,6644.9,13290],
        [27.500,55.000,110.00,220.00,440.00,880.00,1760.0,3520.0,7040.0,14080],
        [29.135,58.270,116.54,233.08,466.16,932.33,1864.7,3729.3,7458.6,14917],
        [30.868,61.735,123.47,246.94,493.88,987.77,1975.5,3951.1,7902.1,15804]]


    PITCH_LIST = []    
    for i,itemt0 in enumerate(t0):
        for j,itemT in enumerate(T):
            pinLabel = "{}{}".format(itemT, itemt0)
            pinValue = c[j][i]
            PITCH_LIST.append((pinLabel,pinValue))

    return PITCH_LIST        

def get_f0_times_by_pyin(file_path):
    """
        mp3格式
    """
    start_t = time.time()
    print(start_t)
    y, sr = librosa.load(file_path)

    # C2~C7人声范围
    f0, voiced_flag, voiced_probs = librosa.pyin(y,
                                                 fmin=librosa.note_to_hz('C2'),
                                                 fmax=librosa.note_to_hz('C7'))
    times = librosa.times_like(f0)

    end_t = time.time()
    print(end_t, end_t - start_t)
    
    return f0, times

def display_f0_times(f0, times):
    fig, ax = plt.subplots()
    ax.set(title='pYIN fundamental frequency estimation')
    ax.plot(times, f0, label='f0', color='cyan', linewidth=3)
    ax.legend(loc='upper left')
    plt.show()
    
    
def beter_fill_u(n_f0, u_line=300):
    """
        np.array
    """    
    
    if u_line > 0 and ((n_f0 > (u_line - 10)) & (n_f0 < (u_line + 10))).any():
        beter_b = u_line
        beter_interval = 10
        beter_t = ((n_f0 > beter_b) & (n_f0 < (beter_b + beter_interval))).any()
        while beter_b > 0 and beter_t:
            beter_b = beter_b + beter_interval
            beter_t = ((n_f0 > beter_b) & (n_f0 < (beter_b + beter_interval))).any()  
        n_f0 = np.where(n_f0 > beter_b, beter_b,n_f0)
    return n_f0

def beter_fill_d(n_f0, d_line=200):
    """
        np.array
    """
    if d_line > 0 and ((n_f0 > (d_line - 10)) & (n_f0 < (d_line + 10))).any():
        beter_b = d_line
        beter_interval = 10
        beter_t = ((n_f0 < beter_b) & (n_f0 > (beter_b - beter_interval))).any()
        while beter_b > 0 and beter_t:
            beter_b = beter_b - beter_interval
            beter_t = ((n_f0 < beter_b) & (n_f0 > (beter_b - beter_interval))).any()   
        n_f0 = np.where(n_f0 < beter_b, beter_b,n_f0)
    return n_f0
    
    
def detect_vocal_pitchrange_info_by(f0, PITCH_LIST=None, beter=False):
    """
        通过librosa提取的基频序列，获取声音中的音域信息
    """
    if PITCH_LIST is None:
        PITCH_LIST = create_standard_pitch_sequence()

    n_f0 = np.array([item for item in list(f0) if np.isnan(item) == False])
    
    if beter:
        # 修正f0
        n_f0 = beter_fill_d(n_f0)
        n_f0 = beter_fill_u(n_f0)
    
    print("基频的最值：",n_f0.min(),n_f0.max())
    
    s_v_list = [(item[0],item[1],abs(item[1] - n_f0.min())) for item in PITCH_LIST if item[1] - n_f0.min() >= 0]
    e_v_list = [(item[0],item[1],abs(item[1] - n_f0.max())) for item in PITCH_LIST if item[1] - n_f0.max() <= 0]
    
    s_v_list.sort(key=lambda x: x[2], reverse=False)
    e_v_list.sort(key=lambda x: x[2], reverse=False)
    
    s_p = s_v_list[0]
    e_p = e_v_list[0]
    print("音域下限：",s_p)
    print("音域上限：",e_p)
    
    return {
        "f0_min": n_f0.min(),# 基频最小值
        "f0_max": n_f0.max(),# 基频最大值
        "r_start": s_p[0],   # 音域下限音高
        "r_end": e_p[0],     # 音域上限音高
        "r_start_f": s_p[1], # 音域下限音高所对应的频率
        "r_end_f": e_p[1],   # 音域上限音高所对应的频率
    }    


if __name__ == "__main__":
    
    f0, times = get_f0_times_by_pyin(librosa.ex('trumpet'))
    display_f0_times(f0, times)
    detect_vocal_pitchrange_info_by(f0, beter=True)