语音数据清洗

重采样


import os
import librosa
import soundfile as sf

# 定义重采样函数
def resample_audio(input_folder, output_folder, target_sr):
    # 遍历输入文件夹下的所有子文件夹
    for subfolder, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith('.wav'):
                print("file:", file)
                # 构建完整的文件路径
                file_path = os.path.join(subfolder, file)
                print("file_path:", file_path)
                
                # 加载音频文件
                audio, sr = librosa.load(file_path)
                
                # 重采样音频
                audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
                print("output_folder:", output_folder)
                print("subfolder.replace(input_folder, ''):", subfolder.replace(input_folder, ''))
                
                # 构建输出文件路径
                #output_file_path = os.path.join(output_folder, subfolder.replace(input_folder, ''), file)
                output_file_path = output_folder + '/' + file
                print("output_file_path:", output_file_path)
                # output_folder_path = os.path.dirname(output_file_path)
                # print("output_file_path2:", output_file_path)
                # if not os.path.exists(output_folder_path):
                #     os.makedirs(output_folder_path)
                
                # 保存重采样后的音频
                sf.write(output_file_path, audio_resampled, target_sr)

                # # 生成频谱图
                # plt.figure(figsize=(10, 4))
                # librosa.display.specshow(librosa.amplitude_to_db(librosa.stft(audio_resampled), ref=np.max),
                #          sr=target_sr,
                #          x_axis='time',
                #          y_axis='linear')
                # plt.colorbar(format='%+2.0f dB')
                # plt.title('Spectrogram of Resampled Audio')

                # # 保存频谱图
                # plt.savefig('spectrogram.png')
                # # plt.show()


# 输入文件夹路径
input_folder = '/home/dgh/Documents/project/PythonProject/clvp/datasets/data'

# 输出文件夹路径
output_folder = '/home/dgh/Documents/project/PythonProject/clvp/datasets/resample'

# 目标采样率
target_sr = 16000

# 调用重采样函数
resample_audio(input_folder, output_folder, target_sr)

响度归一化

import os
import soundfile as sf
from pydub import AudioSegment

# 定义响度归一化的函数
def normalize_loudness(input_folder, output_folder, target_loudness):
    # 遍历输入文件夹下的所有文件
    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            # 构建完整的文件路径
            file_path = os.path.join(input_folder, filename)
            
            # 读取音频文件
            audio, sr = sf.read(file_path)
            
            # 应用响度归一化
            audio_normalized = AudioSegment.from_wav(file_path)
            audio_normalized = audio_normalized.apply_gain(-target_loudness)
            
            # 构建输出文件路径
            output_file_path = os.path.join(output_folder, filename)
            
            # 保存重采样后的音频
            sf.write(output_file_path, audio_normalized.get_array_of_samples(), sr)

# 输入文件夹路径
input_folder = '/home/dgh/Documents/project/PythonProject/datasets/resampled'

# 输出文件夹路径
output_folder = '/home/dgh/Documents/project/PythonProject/normalize_loudness/'

# 目标响度(以LUFS为单位)
target_loudness = -23

# 调用响度归一化函数
normalize_loudness(input_folder, output_folder, target_loudness)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值