python波形图librosa_对python中Librosa的mfcc步骤详解

本文详细介绍了Python中使用Librosa库实现MFCC(梅尔频率倒谱系数)的步骤,包括语音数据归一化、窗函数、数据填充、分帧、加窗、傅里叶变换、梅尔计算等关键过程,对于理解和应用Librosa进行音频处理非常有帮助。
摘要由CSDN通过智能技术生成

对python中Librosa的mfcc步骤详解

发布时间:2020-10-18 23:55:35

来源:脚本之家

阅读:71

作者:qq_39239990

1.对语音数据归一化

如16000hz的数据,会将每个点/32768

2.计算窗函数:(*注意librosa中不进行预处理)

3.进行数据扩展填充,他进行的是镜像填充("reflect")

如原数据为 12345 -》 填充为4的,左右各填充4 即:5432123454321 即:5432-12345-4321

4.分帧

5.加窗:对每一帧进行加窗,

6.进行fft傅里叶变换

librosa中fft计算,可以使用.net中的System.Numerics

MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab) 计算,结果相同

7.mel计算(每一帧取20个特征点)

Imports System.Numerics

Imports MathNet.Numerics

Imports MathNet.Numerics.IntegralTransforms

Module mfcc_module

Public Class Librosa

End Class

Dim pi As Double = 3.1415926535897931

Public Function spectrum(fft_data(,) As Complex) As Double(,)

Dim new_data(fft_data.GetLength(0) - 1, fft_data.GetLength(1) - 1) As Double

For n = 0 To fft_data.GetLength(0) - 1

' Debug.Print("spectrum//")

' Debug.Print("spectrum//")

For i = 0 To fft_data.GetLength(1) - 1

new_data(n, i) = fft_data(n, i).MagnitudeSquared

' Debug.Write(new_data(n, i) & " ")

Next

Next

Return new_data

End Function

Public Function FFT(data As Double(,)) As Complex(,)

Dim result(data.GetLength(0) - 1, 1024) As Complex

'2049 加了一个 数组类型 0 开始

Dim FFT_frame As Complex() = New Complex(data.GetLength(1) - 1) {}

For n = 0 To data.GetLength(0) - 1

For i As Integer = 0 To data.GetLength(1) - 1

FFT_frame(i) = data(n, i)

Next

MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab)

For k = 0 To 1024

result(n, k) = FFT_frame(k)

Next

'Debug.Print("fft **************")

'For Each mem In FFT_frame

' Debug.Print(mem.ToString & " ")

'Next

Next n

Return result

End Function

Public Function _mfcc(dct_ As Double(,), power_to_db_ As Double(,)) As Double(,)

'dct 20,128

'power_to_db 5,128

'result = 20,5

Dim result(dct_.GetLength(0) - 1, power_to_db_.GetLength(1) - 1) As Double

Dim r1, r2 As Double

For n = 0 To dct_.GetLength(0) - 1 '20

For i = 0 To power_to_db_.GetLength(1) - 1 '5

r2 = 0

For k = 0 To dct_.GetLength(1) - 1 '128

r1 = dct_(n, k) * power_to_db_(k, i)

r2 = r2 + r1

Next

result(n, i) = r2

Next

Next

Return result

End Function

Public Function Dct(n_filters As Integer, n_input As Integer) As Double(,)

Dim t1 As Double = 2 * n_input

Dim samples(n_input - 1) As Double

Dim basis(n_filters - 1, n_input - 1) As Double

Dim n As Integer = 1

For i = 0 To n_input - 1

samples(i) = n * pi / (2 * n_input)

n = n + 2

Next i

For i = 0 To n_input - 1

basis(0, i) = 1 / Math.Sqrt(n_input)

Next

For n = 1 To n_filters - 1

For i = 0 To n_input - 1

basis(n, i) = Math.Cos(n * samples(i)) * Math.Sqrt(2 / n_input)

Next

Next

Return basis

End Function

'1e-10 = 0.0000000001

Public Function power_to_db(S As Double(,), Optional ref As Double = 1, Optional admin As Double = 0.0000000001, Optional top_db As Double = 80) As Double(,)

Dim result(S.GetLength(0) - 1, S.GetLength(1) - 1) As Double

Dim log_spec As Double

For n = 0 To S.GetLength(0) - 1

For i = 0 To S.GetLength(1) - 1

log_spec = 10 * Math.Log10(Math.Max(admin, S(n, i)))

result(n, i) = log_spec - 10 * Math.Log10(Math.Max(admin, ref))

Next

Next

'If top_db <> 0 Then

' For n = 0 To S.GetLength(0) - 1

' For i = 0 To S.GetLength(1) - 1

' 'result(n, i) = Math.Max(result(n, i), result(n, i) - top_db)

' Next

' Next

'End If

Return result

End Function

Public Function melspectrogram(mel_basis(,) As Double, s(,) As Double) As Double(,)

'mel_basis 128,1025

's 5 ,1025 -> 1025,5

' result 128,5

Dim result(mel_basis.GetLength(0) - 3, s.GetLength(0) - 1) As Double

Dim r1, r2 As Double

For n = 0 To mel_basis.GetLength(0) - 3

For i = 0 To s.GetLength(0) - 1

For k = 0 To mel_basis.GetLength(1) - 1

r1 = mel_basis(n, k) * s(i, k)

r2 = r2 + r1

Next

result(n, i) = r2

r2 = 0

Next

Next

Return result

End Function

Public Function normal(mel_f As Double(), weights(,) As Double) As Double(,)

Dim enorm(mel_f.Length - 2) As Double

' Debug.Print("*************normal//")

' Debug.Print("*************normal//")

For i = 0 To mel_f.Length - 3

enorm(i) = 2 / (mel_f(2 + i) - mel_f(i))

Next

For i = 0 To weights.GetLength(1) - 1

For n = 0 To weights.GetLength(0) - 2

weights(n, i) = weights(n, i) * enorm(n)

Next

Next

Return weights

End Function

Public Function weight(a As Double(,), fdiff As Double()) As Double(,)

Dim lower, upper As Double

Dim data(a.GetLength(0) - 1, a.GetLength(1) - 1) As Double

For n = 0 To a.GetLength(0) - 3

For i = 0 To a.GetLength(1) - 1

lower = -(a(n, i) / fdiff(n))

upper = a(n + 2, i) / fdiff(n + 1)

data(n, i) = Math.Max(0, Math.Min(lower, upper))

Next

Next

Return data

End Function

Public Function ramps(A As Double(), B As Double()) As Double(,)

Dim data(A.Length - 1, B.Length - 1) As Double

' Debug.Print("ramps*********************")

For n = 0 To A.Length - 1

'Debug.Print("******")

'Debug.Print("------")

For i = 0 To B.Length - 1

data(n, i) = A(n) - B(i)

'Debug.Write(data(n, i) & " ")

Next

Next

Return data

End Function

Public Function diff(arr As Double()) As Double()

Dim data(arr.Length - 2) As Double

For i = 1 To arr.Length - 1

data(i - 1) = arr(i) - arr(i - 1)

'Debug.Print(data(i - 1))

Next

Return data

End Function

'分帧 算法2

Public Function Frame2(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double(,)

Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1

Dim new_buff(tim - 1, n_ftt - 1) As Double

Dim copypos As Integer = 0

For i = 0 To tim - 1

For k = 0 To n_ftt - 1

new_buff(i, k) = y(copypos + k)

Next

copypos = copypos + hop

Next

'For k = 0 To tim - 1

' Debug.Print("//")

' Debug.Print("///fram2///" & k)

' For i = 0 To n_ftt - 1

' Debug.Print(new_buff(k, i) & " ")

' Next

'Next k

Return new_buff

End Function

'

Public Function Frame(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double()

Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1

Dim new_buff(tim * n_ftt) As Double

Dim pos As Integer = 0

Dim copypos As Integer = 0

For i = 0 To tim - 1

Array.Copy(y, copypos, new_buff, pos, n_ftt)

'Buffer.BlockCopy(y, 0, new_buff, pos, n_ftt)

copypos = copypos + hop

pos = pos + n_ftt

Next

For k = 0 To tim - 1

'Debug.Print("//")

'Debug.Print("//")

For i = 0 To n_ftt - 1

Debug.Write(new_buff(k * n_ftt + i) & " ")

Next

Next k

Return new_buff

End Function

Public Function MelFilter() As Double()

Dim filter_points(128 + 1) As Integer '40个滤波器,需要41点

Const sampleRate As Integer = 16000 '采样频率 16000

Const filterNum As Integer = 128 '滤波器数量 取40个

Const frameSize As Integer = 512 '帧长512

Dim freMax As Double = sampleRate / 2 '实际最大频率

Dim freMin As Double = 0 '实际最小频率

Dim melFremax As Double = hz_to_mel(freMax) '将实际频率转换成梅尔频率

Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)

Dim k As Double = (melFremax - melFremin) / (filterNum + 1)

Dim m As Double() = New Double(filterNum + 1) {}

Dim h As Double() = New Double(filterNum + 1) {}

For i As Integer = 0 To filterNum + 1

m(i) = melFremin + k * i

'h(i) = 700 * (Math.Exp(m(i) / 1125) - 1)

'将梅尔频率转换成实际频率

filter_points(i) = mel_to_hz(m(i))

'Debug.Print(m(i))

Next

Dim hzs As Double() = mel_to_hz2(m)

'For i = 0 To filterNum + 1

' ' Debug.Print(hzs(i))

'Next

Return hzs

End Function

Public Function hz_to_mel(frequencies As Double, Optional htk As Boolean = False) As Double

Dim mels As Double

If htk Then

mels = 1125 * Math.Log(1 + frequencies / 700)

Else

Dim f_min As Double = 0.0

Dim f_sp As Double = 200.0 / 3

Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)

Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)

Dim logstep As Double = Math.Log(6.4) / 27.0 ' step size for log region

mels = min_log_mel + Math.Log(frequencies / min_log_hz) / logstep

End If

Return mels

End Function

Public Function mel_to_hz2(mel() As Double, Optional htk As Boolean = False) As Double()

Dim hz(mel.Length - 1) As Double

Dim f_min As Double = 0.0

Dim f_sp As Double = 200.0 / 3

Dim freqs(mel.Length - 1) As Double

For i = 0 To mel.Length - 1

freqs(i) = f_min + f_sp * mel(i)

Next i

Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)

Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)

Dim logstep As Double = Math.Log(6.4) / 27.0

For i = 0 To mel.Length - 1

If (mel(i) > min_log_mel) Then

freqs(i) = min_log_hz * Math.Exp(logstep * (mel(i) - min_log_mel))

End If

Next

'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))

Return freqs

End Function

Public Function mel_to_hz(mel As Double, Optional htk As Boolean = False) As Double

Dim hz As Double

If htk Then

hz = 700 * (Math.Exp(mel) / 1125) - 1

Else

Dim f_min As Double = 0.0

Dim f_sp As Double = 200.0 / 3

Dim freqs = f_min + f_sp * mel

Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)

Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)

Dim logstep As Double = Math.Log(6.4) / 27.0

hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))

'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))

End If

Return hz

End Function

Public Function fft_frequencies(sr As Integer, n_fft As Integer) As Double()

Dim fft_data(n_fft / 2) As Double

For i = 0 To n_fft / 2

fft_data(i) = i * sr / n_fft

Next

Return fft_data

End Function

'左右填充,优化

Public Function PadReflect2(data() As Double, num As Integer)

'pad 10 ,10

Dim tim(data.Length - 3) As Double

For i = 0 To data.Length - 3

tim(i) = data(data.Length - 2 - i)

Next

Dim dump() As Double = data.Concat(tim).ToArray()

'For Each i In dump

' Debug.Write(i)

End Function

Public Function PadReflect(data() As Double, num As Integer)

'pad 10 ,10

Dim tim(data.Length - 3) As Double

For i = 0 To data.Length - 3

tim(i) = data(data.Length - 2 - i)

Next

Dim dump() As Double = data.Concat(tim).ToArray()

'For Each i In dump

' Debug.Write(i)

'Next

'left_edge

' Debug.Print("***************************")

Dim left_edge(num - 1) As Double

_CopyDup(left_edge, dump, True)

'For i = 0 To num - 1

' Debug.Write(left_edge(i))

'Next

'right_edge

'Debug.Print("***************************")

Dim right_edge(num + data.Length) As Double

_CopyDup(right_edge, dump, False)

'For i = 0 To num - 1

' Debug.Write(right_edge(i))

'Next

'Debug.Print("***************************")

Dim result As Double() = left_edge.Concat(right_edge).ToArray()

Return result

End Function

'copy tim to data dumply

Public Function _CopyDup(data() As Double, tim() As Double, Optional left As Boolean = True)

Dim last As Integer = data.Length Mod tim.Length

Dim times As Integer = Math.Floor(data.Length / tim.Length)

Dim pos As Integer

If left Then

Array.Copy(tim, tim.Length - last, data, 0, last)

pos = last

For i = 0 To times - 1

Array.Copy(tim, 0, data, pos, tim.Length)

pos = pos + tim.Length

Next

Else

'RIGHT

pos = 0

For i = 0 To times - 1

Array.Copy(tim, 0, data, pos, tim.Length)

pos = pos + tim.Length

Next

Array.Copy(tim, 0, data, pos, last)

End If

End Function

Public Function General_cosine(M As Integer, alpha As Double(), sym As Boolean) As Double()

If Not sym Then

M = M + 1

End If

Dim tim As Double = (2 * pi) / (M - 1)

Dim x(M) As Double

Dim w(M) As Double

'Debug.Print("ine")

For i = 0 To M - 1

x(i) = -pi + tim * i

'Debug.Write(x(i) & " ")

Next

'Debug.Print("******")

For i = 0 To alpha.GetLength(0) - 1

For k = 0 To M - 1

w(k) = w(k) + alpha(i) * Math.Cos(i * x(k))

'Debug.Write(w(k) & " ")

Next

Next

Return w

End Function

'''

''' 汉明窗

'''

''' 窗长

'''

Public Function General_hamming(M As Integer) As Double()

Dim db As Double() = {0.5, 1 - 0.5}

Return General_cosine(M, db, False) '进行加1 ,若sys为false

End Function

Public Function Get_window(M As Integer) As Double()

Return General_hamming(M)

End Function

End Module

Imports System.IO

Imports System.Numerics

Imports TensorFlow

'Install-Package TensorFlowSharp

Public Class KeyWordDetect

Dim graph As TFGraph

Dim session As TFSession

'加载模型

Public Sub New()

Dim model As Byte() = File.ReadAllBytes("f:\graph2.pb")

'导入GraphDef

graph = New TFGraph()

graph.Import(model, "")

session = New TFSession(graph)

' Threading.ThreadPool.SetMaxThreads(5, 5)

End Sub

Protected Overrides Sub finalize()

session.CloseSession()

End Sub

'将声音数据变为mfcc byte数据

Public Function DataBToMFCC(dataB() As Byte) As Double(,)

Dim buff16(dataB.Length / 2 - 1) As Int16

Buffer.BlockCopy(dataB, 0, buff16, 0, dataB.Length - 1)

Dim result(,) As Double = MFCC(buff16)

Return result

End Function

'将声音数据变为mfcc

Public Function DataToMFCC(dataI() As Int16) As Double(,)

Dim result(,) As Double = MFCC(dataI)

Return result

End Function

'将mfcc变为输入数据格式

Public Function MFCCToVect(mfcc As Double(,)) As Double(,,)

Dim data(0, 1, 129) As Double

Dim n As Integer = 0, m As Integer = 0

For i = 0 To mfcc.GetLength(0) - 1

For k = 0 To mfcc.GetLength(1) - 1

data(0, m, n) = mfcc(i, k)

n = n + 1

Next

If n = 130 Then

m = 1

n = 0

End If

Next

Return data

End Function

Dim output

Dim runner As TFSession.Runner

Dim result

Dim rshape

'关键字检测

Public Function Detected(Data(,,) As Double) As Double

' Dim tensor As TFTensor = New TFTensor(Data)

runner = session.GetRunner()

runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))

output = runner.Run()

result = output(0)

rshape = result.Shape

Dim rt As Double

rt = result.GetValue(True)(0)(0)

'For k = 0 To rshape.GetValue(0) - 1

' rt = result.GetValue(True)(k)(0)

' 'Debug.Print(rt)

' If (rt > 0.8) Then

' Debug.Print("-----------recogxili")

' ' MsgBox("recgo")

' End If

'Next

Return RT

End Function

'Public Function RunB(dataB() As Byte)

' Dim mfccd As Double(,) = DataBToMFCC(dataB)

' Dim inputx As Double(,,) = MFCCToVect(mfccd)

' Detected(inputx)

'End Function

'Public Function ThreadPoolRun(dataI() As Int16)

' Threading.ThreadPool.QueueUserWorkItem(Run(dataI), dataI)

' ' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run))

' ' thrd1.Start(dataI)

'End Function

'Delegate Function DelgRun(dataI() As Int16)

'Public Function ThreadRun(dataI() As Int16)

' ' Dim drun As New DelgRun(AddressOf Run)

' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run))

' thrd1.Start(dataI)

'End Function

Public Function Run(dataI() As Int16) As Double

' Debug.Print("thread *****1")

Dim mfccd As Double(,) = DataToMFCC(dataI)

Dim inputx As Double(,,) = MFCCToVect(mfccd)

Return Detected(inputx)

End Function

Public Function MFCC(buff16() As Int16) As Double(,)

Dim datalen As Integer = buff16.Length * 2

Dim double_buff(datalen / 2 - 1) As Double

Dim len As Integer = datalen / 2

Array.Copy(buff16, double_buff, len)

'******************

For i = 0 To double_buff.Length - 1

double_buff(i) = double_buff(i) / 32768

' Debug.Print(double_buff(i))

Next

'汉明窗create

Dim hann_window As Double() = Get_window(2048)

'Debug.Print("--------------------------")

'Debug.Print("hann_window**************")

For Each i In hann_window

'Debug.Print(i & " ")

Next

'Debug.Print("--------------------------")

'Debug.Print("*************pad reflect**************")

Dim y As Double() = PadReflect(double_buff, 1024)

' Dim y As Double() = double_buff

'For Each i In y

' 'Debug.Print(i & " ")

'Next

'Debug.Print("--------------------------")

'Debug.Print("***************frame************")

Dim frams As Double(,) = Frame2(y)

Dim tim As Integer = frams.GetLength(0)

'Debug.Print("--------------------------")

'Debug.Print("**********hann * data**************")

Dim hannData(tim - 1, 2047) As Double

For n = 0 To tim - 1

For i = 0 To 2048 - 1

hannData(n, i) = frams(n, i) * hann_window(i)

' Debug.Print(hannData(i) & " ")

Next

Next n

'\\\\\\\\\\\\\\\\melspecture

Dim specturm1(,) As Complex = FFT(hannData)

'For i = 0 To specturm1.GetLength(0) - 1

' Debug.Print("--------------------------------------")

' Debug.Print("--------------------------------------")

' For k = 0 To specturm1.GetLength(1) - 1

' Debug.Print(specturm1(i, k).Real & " " & specturm1(i, k).Imaginary)

' Next

'Next

Dim s As Double(,) = spectrum(specturm1)

Dim fftfreqs() As Double = fft_frequencies(16000, 2048)

'Debug.Print("***************fftfreqs*****************")

'Debug.Print("***************fftfreqs*****************")

'Debug.Print("fftfreqs.shape", fftfreqs.Length)

'For i = 0 To fftfreqs.Length - 1

' 'Debug.Write(fftfreqs(i) & " ")

'Next

''''''''''''''''mel * specturm1

'Debug.Print("**************")

'Debug.Print("****滤波器创建**********")

Dim mel_f As Double() = MelFilter()

'Debug.Print("--------------------------")

'Debug.Print("hann_window**************")

'Debug.Print("diff")

Dim fdiff As Double() = diff(mel_f)

Dim ramps_ As Double(,) = ramps(mel_f, fftfreqs)

Dim weights(,) As Double = weight(ramps_, fdiff)

normal(mel_f, weights)

'S*WEIGHT = melspectrogram

'weight 128,1025

's 5 ,1025

Dim melspectrogram_(,) As Double = melspectrogram(weights, s)

Dim power_to_db_ As Double(,) = power_to_db(melspectrogram_)

Dim dct_ As Double(,) = Dct(20, 128)

Return _mfcc(dct_, power_to_db_)

End Function

End Class

以上这篇对python中Librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持亿速云。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
`python_speech_features`是Python一个常用的语音信号处理库,其的`mfcc`函数是用于计算梅尔频率倒谱系数(Mel Frequency Cepstral Coefficients,MFCC)的函数。 MFCC是语音信号处理常用的特征参数,可以用于语音识别、说话人识别等任务。MFCC的计算过程包括以下几个步骤: 1. 分帧:将语音信号分成若干个短时窗口,通常使用汉明窗或其他窗函数对每个窗口进行加权。 2. 傅里叶变换:对每个窗口内的语音信号进行快速傅里叶变换(FFT),将信号从时域转换到频域。 3. 梅尔滤波器组:将频率轴划分为若干个梅尔滤波器组,每个梅尔滤波器组对应一个梅尔频率,用于模拟人耳对频率的响应。 4. 取对数:对每个梅尔滤波器组的能量取对数,得到梅尔频率谱系数(Mel Spectral Coefficients,MSC)。 5. 离散余弦变换:对MSC进行离散余弦变换(DCT),得到MFCC。 `python_speech_features`的`mfcc`函数实现了上述MFCC的计算过程,可以将语音信号转换成一组MFCC特征向量。使用方法如下: ``` python from python_speech_features import mfcc import scipy.io.wavfile as wav (rate, signal) = wav.read("test.wav") mfcc_feat = mfcc(signal, rate) ``` 其,`rate`是采样率,`signal`是语音信号的numpy数组。`mfcc`函数的返回值`mfcc_feat`是一个二维numpy数组,每行是一个MFCC特征向量。可以将`mfcc_feat`作为语音信号的特征向量,用于后续的语音识别任务。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值