重采样
import os
import librosa
import soundfile as sf
# 定义重采样函数
def resample_audio(input_folder, output_folder, target_sr):
# 遍历输入文件夹下的所有子文件夹
for subfolder, dirs, files in os.walk(input_folder):
for file in files:
if file.endswith('.wav'):
print("file:", file)
# 构建完整的文件路径
file_path = os.path.join(subfolder, file)
print("file_path:", file_path)
# 加载音频文件
audio, sr = librosa.load(file_path)
# 重采样音频
audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
print("output_folder:", output_folder)
print("subfolder.replace(input_folder, ''):", subfolder.replace(input_folder, ''))
# 构建输出文件路径
#output_file_path = os.path.join(output_folder, subfolder.replace(input_folder, ''), file)
output_file_path = output_folder + '/' + file
print("output_file_path:", output_file_path)
# output_folder_path = os.path.dirname(output_file_path)
# print("output_file_path2:", output_file_path)
# if not os.path.exists(output_folder_path):
# os.makedirs(output_folder_path)
# 保存重采样后的音频
sf.write(output_file_path, audio_resampled, target_sr)
# # 生成频谱图
# plt.figure(figsize=(10, 4))
# librosa.display.specshow(librosa.amplitude_to_db(librosa.stft(audio_resampled), ref=np.max),
# sr=target_sr,
# x_axis='time',
# y_axis='linear')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Spectrogram of Resampled Audio')
# # 保存频谱图
# plt.savefig('spectrogram.png')
# # plt.show()
# 输入文件夹路径
input_folder = '/home/dgh/Documents/project/PythonProject/clvp/datasets/data'
# 输出文件夹路径
output_folder = '/home/dgh/Documents/project/PythonProject/clvp/datasets/resample'
# 目标采样率
target_sr = 16000
# 调用重采样函数
resample_audio(input_folder, output_folder, target_sr)
响度归一化
import os
import soundfile as sf
from pydub import AudioSegment
# 定义响度归一化的函数
def normalize_loudness(input_folder, output_folder, target_loudness):
# 遍历输入文件夹下的所有文件
for filename in os.listdir(input_folder):
if filename.endswith('.wav'):
# 构建完整的文件路径
file_path = os.path.join(input_folder, filename)
# 读取音频文件
audio, sr = sf.read(file_path)
# 应用响度归一化
audio_normalized = AudioSegment.from_wav(file_path)
audio_normalized = audio_normalized.apply_gain(-target_loudness)
# 构建输出文件路径
output_file_path = os.path.join(output_folder, filename)
# 保存重采样后的音频
sf.write(output_file_path, audio_normalized.get_array_of_samples(), sr)
# 输入文件夹路径
input_folder = '/home/dgh/Documents/project/PythonProject/datasets/resampled'
# 输出文件夹路径
output_folder = '/home/dgh/Documents/project/PythonProject/normalize_loudness/'
# 目标响度(以LUFS为单位)
target_loudness = -23
# 调用响度归一化函数
normalize_loudness(input_folder, output_folder, target_loudness)