librosa 部分 api 介绍

1.导入语音

import librosa
audio_path = './1.wav'
x , sr = librosa.load(audio_path)
print(f'x = {x}')
print(f'x.shape = {x.shape}') # (采样率*语音时间,)
print(f'sr = {sr}')# 采样率

2. 修改采样率

y , sr = librosa.load(audio_path, sr=new_sr)

3. stft 短时傅里叶变换

源码

libroas.stft(y, n_fft=2048,)

def stft(
    y,
    n_fft=2048,
    hop_length=None,
    win_length=None,
    window="hann",
    center=True,
    dtype=None,
    pad_mode="reflect",
):
    """Short-time Fourier transform (STFT).
    The STFT represents a signal in the time-frequency domain by
    computing discrete Fourier transforms (DFT) over short overlapping
    windows.
    This function returns a complex-valued matrix D such that
    - ``np.abs(D[f, t])`` is the magnitude of frequency bin ``f``
      at frame ``t``, and
    - ``np.angle(D[f, t])`` is the phase of frequency bin ``f``
      at frame ``t``.
    The integers ``t`` and ``f`` can be converted to physical units by means
    of the utility functions `frames_to_sample` and `fft_frequencies`.
    
Returns
    -------
    D : np.ndarray [shape=(1 + n_fft/2, n_frames), dtype=dtype]
        Complex-valued matrix of short-term Fourier transform
        coefficients.

短时傅立叶变换(STFT),返回一个复数矩阵使得D(f,t)

复数的实部:np.abs(D(f,t))频率的振幅
复数的虚部:np.angle(D(f,t))频率的相位

参数:
• y:音频时间序列
n_fft:FFT窗口大小,n_fft=hop_length+overlapping
hop_length:帧移,如果未指定,则默认win_length / 4
• win_length:每一帧音频都由window()加窗。窗长win_length,然后用零填充以匹配n_fft
 默认win_length=n_fft。
• window:字符串,元组,数字,函数 shape =(n_fft, )
 窗口(字符串,元组或数字)
 窗函数,例如scipy.signal.hanning
 长度为n_fft的向量或数组
• center:bool
 如果为True,则填充信号y,以使帧 D [:, t]以y [t * hop_length]为中心
 如果为False,则D [:, t]从y [t * hop_length]开始
• dtype:D的复数值类型。默认值为64-bit complex复数
• pad_mode:如果center = True,则在信号的边缘使用填充模式
 默认情况下,STFT使用reflection padding

返回:
• STFT矩阵 shape=(1+n_fft / 2,t)

关于 t 大小计算
查看源码

t = [fs*times - frame_length]/hop_length
librosa.util.frame(y, frame_length=2048, hop_length=64)

def frame(x, frame_length, hop_length, axis=-1):
    """Slice a data array into (overlapping) frames.
    This implementation uses low-level stride manipulation to avoid
    making a copy of the data.  The resulting frame representation
    is a new view of the same input data.
    However, if the input data is not contiguous in memory, a warning
    will be issued and the output will be a full copy, rather than
    a view of the input data.
    For example, a one-dimensional input ``x = [0, 1, 2, 3, 4, 5, 6]``
    can be framed with frame length 3 and hop length 2 in two ways.
    The first (``axis=-1``), results in the array ``x_frames``::
        [[0, 2, 4],
         [1, 3, 5],
         [2, 4, 6]]
    where each column ``x_frames[:, i]`` contains a contiguous slice of
    the input ``x[i * hop_length : i * hop_length + frame_length]``.
    The second way (``axis=0``) results in the array ``x_frames``::
        [[0, 1, 2],
         [2, 3, 4],
         [4, 5, 6]]
    where each row ``x_frames[i]`` contains a contiguous slice of the input.
    This generalizes to higher dimensional inputs, as shown in the examples below.
    In general, the framing operation increments by 1 the number of dimensions,
    adding a new "frame axis" either to the end of the array (``axis=-1``)
    or the beginning of the array (``axis=0``).
    
    Parameters
    ----------
    x : np.ndarray
        Array to frame
    frame_length : int > 0 [scalar]
        Length of the frame
    hop_length : int > 0 [scalar]
        Number of steps to advance between frames
    axis : 0 or -1
        The axis along which to frame.
        If ``axis=-1`` (the default), then ``x`` is framed along its last dimension.
        ``x`` must be "F-contiguous" in this case.
        If ``axis=0``, then ``x`` is framed along its first dimension.
        ``x`` must be "C-contiguous" in this case.
        
    Returns
    -------
    x_frames : np.ndarray [shape=(..., frame_length, N_FRAMES) or (N_FRAMES, frame_length, ...)]
        A framed view of ``x``, for example with ``axis=-1`` (framing on the last dimension)::
            x_frames[..., j] == x[..., j * hop_length : j * hop_length + frame_length]
        If ``axis=0`` (framing on the first dimension), then::
            x_frames[j] = x[j * hop_length : j * hop_length + frame_length]
            
Examples
    --------
    Extract 2048-sample frames from monophonic signal with a hop of 64 samples per frame
    >>> y, sr = librosa.load(librosa.ex('trumpet'))
    >>> frames = librosa.util.frame(y, frame_length=2048, hop_length=64)
    >>> frames
    array([[-1.407e-03, -2.604e-02, ..., -1.795e-05, -8.108e-06],
           [-4.461e-04, -3.721e-02, ..., -1.573e-05, -1.652e-05],
           ...,
           [ 7.960e-02, -2.335e-01, ..., -6.815e-06,  1.266e-05],
           [ 9.568e-02, -1.252e-01, ...,  7.397e-06, -1.921e-05]],
          dtype=float32)
    >>> y.shape
    (117601,)
    >>> frames.shape
    (2048, 1806)
    Or frame along the first axis instead of the last:
    >>> frames = librosa.util.frame(y, frame_length=2048, hop_length=64, axis=0)
    >>> frames.shape
    (1806, 2048)

4. 梅尔谱提取

源码

librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)

def melspectrogram(
    y=None,
    sr=22050,
    S=None,
    n_fft=2048,
    hop_length=512,
    win_length=None,
    window="hann",
    center=True,
    pad_mode="reflect",
    power=2.0,
    **kwargs,
):
    """Compute a mel-scaled spectrogram.
    If a spectrogram input ``S`` is provided, then it is mapped directly onto
    the mel basis by ``mel_f.dot(S)``.
    If a time-series input ``y, sr`` is provided, then its magnitude spectrogram
    ``S`` is first computed, and then mapped onto the mel scale by
    ``mel_f.dot(S**power)``.
    By default, ``power=2`` operates on a power spectrum.
    
Returns
    -------
    S : np.ndarray [shape=(n_mels, t)]
        Mel spectrogram

Examples
    --------
    >>> y, sr = librosa.load(librosa.ex('trumpet'))
    >>> librosa.feature.melspectrogram(y=y, sr=sr)
    array([[3.837e-06, 1.451e-06, ..., 8.352e-14, 1.296e-11],
           [2.213e-05, 7.866e-06, ..., 8.532e-14, 1.329e-11],
           ...,
           [1.115e-05, 5.192e-06, ..., 3.675e-08, 2.470e-08],
           [6.473e-07, 4.402e-07, ..., 1.794e-08, 2.908e-08]],
          dtype=float32)
    Using a pre-computed power spectrogram would give the same result:
    >>> D = np.abs(librosa.stft(y))**2
    >>> S = librosa.feature.melspectrogram(S=D, sr=sr)
    Display of mel-frequency spectrogram coefficients, with custom
    arguments for mel filterbank construction (default is fmax=sr/2):
    >>> # Passing through arguments to the Mel filters
    >>> S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,
    ...                                     fmax=8000)

5.提取 Log-Mel Spectrogram 对数梅尔

import librosa
 # Load a wav file
 y, sr = librosa.load('./beat.wav', sr=None)
 # extract mel spectrogram feature
 melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128)
 # convert to log scale
 logmelspec = librosa.power_to_db(melspec)

6.提取 MFCC
源码

mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

def mfcc(
    y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm="ortho", lifter=0, **kwargs
):
    """Mel-frequency cepstral coefficients (MFCCs)

Returns
    -------
    M : np.ndarray [shape=(n_mfcc, t)]
        MFCC sequence

6. 绘图

 librosa.display.wavplot()
 ibrosa.display.specshow()

import librosa
import matplotlib.pyplot as plt
import librosa.display

audio_path = ./1.wav'
x , sr = librosa.load(audio_path)
print(f'x.shape = {x.shape}')
print(f'sr = {sr}')

#波形图
librosa.display.waveplot(x)
plt.show()

#频谱特征图
stft = librosa.stft(x, n_fft=2048)
print(f'stft.shape = {stft.shape}')
librosa.display.specshow(stft, x_axis='time', y_axis='hz')
plt.show()

mfcc = librosa.feature.mfcc(x, n_fft=2048, n_mfcc = 36)
print(f'mfcc.shape = {mfcc.shape}')
librosa.display.specshow(mfcc, x_axis='time', y_axis='hz')
plt.show()
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值