librosa 短时傅里叶变换
import numpy as np # pip install numpy
import librosa # pip install librosa
y, sr = librosa.load(
librosa.ex('nutcracker'), # 音频路径( librosa.ex('nutcracker') 可换成:1.wav )
sr=20050 # 输出采样率(默认是20050)
)
y # 音频时间序列
array([ 2.2716861e-06, 5.3327208e-06, -7.2473290e-06, …,
1.1170751e-05, 1.2871889e-06, 5.4120628e-06], dtype=float32)
y.shape
(2643264,)
sr # 输出采样率
22050
S = np.abs(librosa.stft(y)) # 短时傅里叶变换
画光谱图
import matplotlib.pyplot as plt
import librosa.display
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(S,
ref=np.max),
y_axis='log', x_axis='time', ax=ax)
ax.set_title('Power spectrogram')
fig.colorbar(img, ax=ax, format="%+2.0f dB")
<matplotlib.colorbar.Colorbar at 0x158e497f250>
自定义短时傅里叶变换
stft = librosa.stft( # 短时傅里叶变换
y=y, # 音频序列
n_fft=898, # FFT窗口大小
# hop_length = 735
)
stft # 频谱矩阵
array([[-2.49133282e-05+0.00000000e+00j, -1.24803155e-05+0.00000000e+00j,
1.69679524e-08+0.00000000e+00j, …,
1.06373847e-07+0.00000000e+00j, -1.46777384e-05+0.00000000e+00j,
-4.89133818e-05+0.00000000e+00j],
[ 2.49395980e-05+5.29395592e-21j, 1.35373410e-08+1.25159850e-05j,
-4.35219061e-09-1.19003154e-08j, …,
1.13886740e-08+5.05490512e-08j, -6.25399207e-06-1.34237325e-05j,
4.41024677e-05-2.08569836e-05j],
[-2.50027770e-05-7.32683499e-20j, 1.25475317e-05-6.21867784e-08j,
-1.76670523e-09+3.36782513e-09j, …,
6.84030681e-08-3.32108563e-08j, 9.50257072e-06-1.13178148e-05j,
-3.11698022e-05+3.77396755e-05j],
…,
[ 6.53184106e-05+1.14349448e-20j, -3.25936016e-05+9.01825206e-06j,
-1.32137757e-06-1.67821142e-07j, …,
-1.37270603e-04+5.64751317e-05j, 4.38775278e-05+1.38663541e-04j,
7.55618457e-05+1.62474601e-06j],
[-5.45223338e-05+1.44736755e-19j, -4.01673014e-06+2.75596376e-05j,
-9.37026925e-07+2.25284953e-06j, …,
6.14464865e-04-1.25877617e-04j, 1.91698258e-04-3.42557149e-04j,
-9.25186323e-05-8.57962368e-05j],
[ 5.99693194e-05+0.00000000e+00j, 2.92711793e-05+0.00000000e+00j,
3.30753460e-06+0.00000000e+00j, …,
-9.10082890e-04+0.00000000e+00j, -4.43718774e-04+0.00000000e+00j,
-8.32068417e-06+0.00000000e+00j]], dtype=complex64)
stft.shape = ( 1 + n f f t 2 , T − w i n L e n g t h h o p L e n g t h + 1 ) (1+\frac{nfft}{2},\frac{T-winLength}{hopLength}+1) (1+2nfft,hopLengthT−winLength+1)
stft.shape
(450, 11801)