语音评估

最新推荐文章于 2023-04-24 15:17:37 发布

望长安于日下

最新推荐文章于 2023-04-24 15:17:37 发布

阅读量310

点赞数

分类专栏：哦吼

本文链接：https://blog.csdn.net/qq_26884501/article/details/103430183

版权

哦吼专栏收录该内容

4 篇文章 1 订阅

订阅专栏

##批量处理
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
import wave
import matplotlib.pyplot as plt
import numpy as np
import os
import librosa
import math
global filepath

f1 = wave.open("C:/Users/daicong/Desktop/主程序/评估/nsec/sounds/ref.wav",'rb')#读取第三个音频文件的信息
params1 = f1.getparams()
nchannels1, sampwidth1, framerate1, nframes1 = params1[:4]
strData1 = f1.readframes(nframes1)#读取音频，字符串格式
waveData1 = np.frombuffer(strData1,dtype=np.int16)#将字符串转化为int
ref = waveData1*1.0/(max(abs(waveData1)))#范围是[-1，1]

def comp_SNR(origianl_waveform, target_waveform):#信噪比
    snr = 10 * np.log10(np.sum(origianl_waveform ** 2) / np.sum((origianl_waveform - target_waveform) ** 2))
    return snr
    #matlab:overall_snr = 10* log10( sum(clean_speech.^2)/sum((clean_speech-processed_speech).^2));

def comp_PSNR(target, masked):#峰值信噪比
    MSE = np.mean((target - masked) ** 2)#均方差
    MAX = np.max(target)       # 信号的最大平时功率
    return 20 * np.log10(MAX / np.sqrt(MSE))

def comp_LSD(target, masked):
    original_spectrogram = librosa.core.stft(target, n_fft=2048)
    target_spectrogram = librosa.core.stft(masked, n_fft=2048)
    original_log = np.log10(np.abs(original_spectrogram) ** 2)
    target_log = np.log10(np.abs(target_spectrogram) ** 2)
    original_target_squared = (original_log - target_log) ** 2
    target_lsd = np.mean(np.sqrt(np.mean(original_target_squared, axis=0)))
    return target_lsd

#def comp_SegSNR(target, masked, windowsize, shift):#设置窗户大小和窗户移动的大小SegSNR(target, masked, 20, 10)
#    #一般的，人的语音信号在20-40ms之间，可选20ms，窗口重叠为1/2或者1/4为宜，即10ms或者5ms
#    # 每帧语音中有重叠部分，除了重叠部分都是帧移，overlap=windowsize-shift
#    #因为总帧数为len(target)。 len(target) - windowsize表示最后一帧没有重叠，要减去一个，剩下的是重叠的部分，
#    #除去overlap就是有多少个窗口的数量，+1表示加上减去的那个帧
#    #num_frame = (len(target) - windowsize) // shift + 1  # 计算帧的数量，这里shift建议选择overlap
#    num_frames = (len(target) - windowsize) // shift# number of frames
#    for i in range(num_frames):
#        window     = 0.5*(1 - np.cos(2*math.pi*(1:winlength)/(winlength+1)))#hanning window
#        
#    SegSNR = np.zeros(num_frame)#占位符
#    # 计算每一帧（每一个窗户中）的信噪比
#    for i in range(num_frame):
#        noise_frame_energy = np.sum(target[i * shift, i * shift+windowsize] ** 2)  # 每一帧噪声的功率
#        speech_frame_energy = np.sum(masked[i * shift, i * shift+windowsize] ** 2)  # 每一帧信号的功率
#        SegSNR[i] = np.log10(speech_frame_energy / noise_frame_energy)
#
#
#
#    return 10 * np.mean(SegSNR)


SNR_result = {}
LSD_result = {}
PSNR_result = {}
SegSNR_result = {}
wavefiles = os.listdir("C:/Users/daicong/Desktop/主程序/评估/nsec/sounds") 
for i in wavefiles:
    wavedata = wave.open("C:/Users/daicong/Desktop/主程序/评估/nsec/sounds/%s"%i,'rb')
    strData = wavedata.readframes((wavedata.getparams())[3])
    waveData = np.frombuffer(strData,dtype=np.int16)
    waveData = waveData*1.0/(max(abs(waveData)))
    
    SNR = comp_SNR(ref,waveData)
    LSD = comp_LSD(ref,waveData)
    PSNR = comp_PSNR(ref,waveData)
    #SegSNR = comp_SegSNR(ref,waveData,20,10)
 
    SNR_result[i] = SNR
    LSD_result[i] = LSD
    PSNR_result[i] = PSNR
   # SegSNR_result[i] = SegSNR

import numpy as np
import wave
import os
import librosa
#import math
global filepath
filepath = "C:/Users/daicong/Documents/SNR/data/" #添加路径
dirname= os.listdir(filepath) #得到文件夹下的所有文件名称 
print(dirname)


#将文件转化成数据
def wavread(i):
    filepath = "C:/Users/daicong/Documents/SNR/data/" #添加路径
    dirname= os.listdir(filepath)
    f = wave.open(filepath+dirname[i],'rb')
    params = f.getparams()
    nchannels, sampwidth, framerate, nframes = params[:4]
    strData = f.readframes(nframes)#读取音频，字符串格式
    waveData = np.fromstring(strData,dtype=np.int16)#将字符串转化为int
    f.close()
    waveData = waveData*1.0/(max(abs(waveData)))#wave幅值归一化
    #waveData = np.reshape(waveData,[nframes,nchannels]).T
    return waveData

#计算信噪比SNR
def numpy_SNR(origianl_waveform, target_waveform):
    # 单位 dB
    signal = np.sum(origianl_waveform ** 2)
    noise = np.sum((origianl_waveform - target_waveform) ** 2)
    snr = 10 * np.log10(signal / noise)
    return snr
def wav_snr(ref_wav, in_wav):# 如果ref wav稍长，则用0填充in_wav
    if (abs(in_wav.shape[0] - ref_wav.shape[0]) > 10):
        pad_width = ref_wav.shape[0] - in_wav.shape[0]
        in_wav = np.pad(in_wav, (0, pad_width), 'constant')
    else:
        print("错误：参考wav与输入wav的长度明显不同")
        return -1

    # 计算 SNR
    norm_diff = np.square(np.linalg.norm(in_wav - ref_wav))
    if (norm_diff == 0):
        print("错误：参考wav与输入wav相同")
        return -1

    ref_norm = np.square(np.linalg.norm(ref_wav))
    snr = 10 * np.log10(ref_norm / norm_diff)
    return snr

#计算峰值信噪比PSNR
def psnr(ref_wav, in_wav):
    MSE = np.mean((ref_wav - in_wav) ** 2)
    MAX = np.max(ref_wav)       # 信号的最大平时功率
    return 20 * np.log10(MAX / np.sqrt(MSE))

#对数谱距离(Log Spectral Distance)
def numpy_LSD(origianl_waveform, target_waveform):
    """ 长度不一样首先补0上去，保证形状一样 """
    pad_width = origianl_waveform.shape[0] - target_waveform.shape[0]
    target_waveform = np.pad(target_waveform, (0, pad_width), 'constant')
    
    """ 比较原始和目标音频之间的对数谱距离（LSD），也称为对数谱失真，
    是两个频谱之间的距离测量值（以dB表示） """


    print("数据形状为", origianl_waveform.shape)
    print("数据类型为", type(origianl_waveform))

    original_spectrogram = librosa.core.stft(origianl_waveform, n_fft=2048)
    target_spectrogram = librosa.core.stft(target_waveform, n_fft=2048)

    original_log = np.log10(np.abs(original_spectrogram) ** 2)
    target_log = np.log10(np.abs(target_spectrogram) ** 2)
    original_target_squared = (original_log - target_log) ** 2
    target_lsd = np.mean(np.sqrt(np.mean(original_target_squared, axis=0)))

    return target_lsd

#打印文件夹下的语音信息
filepath = "C:/Users/daicong/Documents/SNR/data/" #添加路径
dirname= os.listdir(filepath)
for i in range(len(dirname)):
    f = wave.open(filepath+dirname[i],'rb')
    params = f.getparams()
    print(dirname[i])
    print(params)