#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Utilities for spectral processing'''
import numpy as np
import scipy.fftpack as fft
import scipy
import scipy.signal
import scipy.interpolate
import six
from . import time_frequency
from .. import cache
from .. import util
from ..util.decorators import moved
from ..util.deprecation import rename_kw, Deprecated
from ..util.exceptions import ParameterError
from ..filters import get_window
__all__ = ['stft', 'istft', 'magphase',
'ifgram', 'phase_vocoder',
'logamplitude', 'perceptual_weighting',
'power_to_db', 'db_to_power',
'amplitude_to_db', 'db_to_amplitude',
'fmt']
@cache(level=20)
def stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann',
center=True, dtype=np.complex64, pad_mode='reflect'):
"""Short-time Fourier transform (STFT)
Returns a complex-valued matrix D such that
`np.abs(D[f, t])` is the magnitude of frequency bin `f`
at frame `t`
`np.angle(D[f, t])` is the phase of frequency bin `f`
at frame `t`
Parameters
----------
y : np.ndarray [shape=(n,)], real-valued
the input signal (audio time series)
n_fft : int > 0 [scalar]
FFT window size
#做n次FFT
hop_length : int > 0 [scalar]
number audio of frames between STFT columns.
If unspecified, defaults `win_length / 4`.
#也就是帧之间的overlap,默认为窗口长度的1/4,这点是我理解错误的
win_length : int <= n_fft [scalar]
Each frame of audio is windowed by `window()`.
The window will be of length `win_length` and then padded
with zeros to match `n_fft`.
If unspecified, defaults to ``win_length = n_fft``.
#窗口大小要小于等于N_FFT,默认N_FFT
window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
- a window specification (string, tuple, or number);
see `scipy.signal.get_window`
- a window function, such as `scipy.signal.hanning`
- a vector or array of length `n_fft`
.. see also:: `filters.get_window`
center : boolean
- If `True`, the signal `y` is padded so that frame
`D[:, t]` is centered at `y[t * hop_length]`.
- If `False`, then `D[:, t]` begins at `y[t * hop_length]`
True是以窗的中心时刻作为时间标示,False以窗的开始位置作为时间标示
dtype : numeric type
Complex numeric type for `D`. Default is 64-bit complex.
#做fft得到64位的复数,有幅值(实部)和相位(虚部),个人纠结是使用实部还是都使用?很多应用说只使用了实部,相位可以忽略不计,but。。。
mode : string
If `center=True`, the padding mode to use at the edges of the signal.
By default, STFT uses reflection padding.
Returns
-------
D : np.ndarray [shape=(1 + n_fft/2, t), dtype=dtype]
STFT matrix
#需要进一步对FFT进行理解,只取信号FFT的一半进行分析,是因为实数信号的FFT具有对称性,
完整的FFT结果具有信息冗余。
See Also
--------
istft : Inverse STFT
ifgram : Instantaneous frequency spectrogram
np.pad : array padding
Notes
-----
This function caches at level 20.
Examples
--------
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> D = librosa.stft(y)
>>> D
array([[ 2.576e-03 -0.000e+00j, 4.327e-02 -0.000e+00j, ...,
3.189e-04 -0.000e+00j, -5.961e-06 -0.000e+00j],
[ 2.441e-03 +2.884e-19j, 5.145e-02 -5.076e-03j, ...,
-3.885e-04 -7.253e-05j, 7.334e-05 +3.868e-04j],
...,
[ -7.120e-06 -1.029e-19j, -1.951e-09 -3.568e-06j, ...,
-4.912e-07 -1.487e-07j, 4.438e-06 -1.448e-05j],
[ 7.136e-06 -0.000e+00j, 3.561e-06 -0.000e+00j, ...,
-5.144e-07 -0.000e+00j, -1.514e-05 -0.000e+00j]], dtype=complex64)
Use left-aligned frames, instead of centered frames
>>> D_left = librosa.stft(y, center=False)
Use a shorter hop length
>>> D_short = librosa.stft(y, hop_length=64)
Display a spectrogram
>>> import matplotlib.pyplot as plt
>>> librosa.display.specshow(librosa.amplitude_to_db(D,
... ref=np.max),
... y_axis='log', x_axis='time')
>>> plt.title('Power spectrogram')
>>> plt.colorbar(format='%+2.0f dB')
>>> plt.tight_layout()
"""
# By default, use the entire frame
if win_length is None:
win_length = n_fft
# Set the default hop, if it's not already specified
if hop_length is None:
hop_length = int(win_length // 4)
fft_window = get_window(window, win_length, fftbins=True)
# Pad the window out to n_fft size
fft_window = util.pad_center(fft_window, n_fft)
# Reshape so that the window can be broadcast
fft_window = fft_window.reshape((-1, 1))
# Pad the time series so that frames are centered
if center:
util.valid_audio(y)
y = np.pad(y, int(n_fft // 2), mode=pad_mode)
# Window the time series.
y_frames = util.frame(y, frame_length=n_fft, hop_length=hop_length)
# Pre-allocate the STFT matrix
stft_matrix = np.empty((int(1 + n_fft // 2), y_frames.shape[1]),
dtype=dtype,
order='F')
# how many columns can we fit within MAX_MEM_BLOCK?
n_columns = int(util.MAX_MEM_BLOCK / (stft_matrix.shape[0] *
stft_matrix.itemsize))
for bl_s in range(0, stft_matrix.shape[1], n_columns):
bl_t = min(bl_s + n_columns, stft_matrix.shape[1])
# RFFT and Conjugate here to match phase from DPWE code
stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window *
y_frames[:, bl_s:bl_t],
axis=0)[:stft_matrix.shape[0]].conj()
return stft_matrix
@cache(level=30)
def istft(stft_matrix, hop_length=None, win_length=None, window='hann',
center=True, dtype=np.float32, length=None):
"""
Inverse short-time Fourier transform (ISTFT).
Converts a complex-valued spectrogram `stft_matrix` to time-series `y`
by minimizing the mean squared error between `stft_matrix` and STFT of
`y` as described in [1]_.
In general, window function, hop length and other parameters should be same
as in stft, which mostly leads to perfect reconstruction of a signal from
unmodified `stft_matrix`.
.. [1] D. W. Griffin and J. S. Lim,
"Signal estimation from modified short-time Fourier transform,"
IEEE Trans. ASSP, vol.32, no.2, pp.236–243, Apr. 1984.
Parameters
----------
stft_matrix : np.ndarray [shape=(1 + n_fft/2, t)]
STFT matrix from `stft`
hop_length : int > 0 [scalar]
Number of frames between STFT columns.
If unspecified, defaults to `win_length / 4`.
win_length : int <= n_fft = 2 * (stft_matrix.shape[0] - 1)
When reconstructing the time series, each frame is windowed
and each sample is normalized by the sum of squared window
according to the `window` function (see below).
If unspecified, defaults to `n_fft`.
window : string, tuple, number, function, np.ndarray [shape=(n_fft,)]
- a window specification (string, tuple, or number);
see `scipy.signal.get_window`
- a window function, such as `scipy.signal.hanning`
- a user-specified window vector of length `n_fft`
.. see also:: `filters.get_window`
center : boolean
- If `True`, `D` is assumed to have centered frames.
- If `False`, `D` is assumed to have left-aligned frames.
dtype : numeric type
librosa.stft() 源码分析
最新推荐文章于 2025-02-19 02:00:00 发布