# -*- coding: utf-8 -*-
import multiprocessing as mp
from pydub import AudioSegment
import os
import pdb
def process_directory(root_dir,output_dir,subsubdir):
done_list = [name_done[:-4] for name_done in os.listdir('path_done')]
done_list2 = [name_done[:-4] for name_done in os.listdir('path_done2')]
done =done_list+done_list2+['ignore']
if subsubdir in done:
print(subsubdir,'done')
pass
else:
print(subsubdir,'wait')
try:
path = os.path.join(root_dir,subsubdir+'/data/audio/')
file_list = os.listdir(path)
except:
file_list = []
for subdir, dirs, files in os.walk(os.path.join(root_dir,subsubdir)):
for file in files:
file_path = os.path.join(subdir, file)
#if os.path.isfile(file_path) and (file_path.endswith('.wav') or file_path.endswith('.mp3')):
# 如果是音频文件则将其添加到列表中
file_list.append(file_path)
file_list = os.listdir(path)
audio_files = [f for f in file_list if f.endswith(".wav")]
if len(audio_files) > 0: # 子目录中有音频文件
print(len(audio_files))
total_list=audio_files
for part in range(len(audio_files)//1000):
audio_files =total_list[part*1000:(part+1)*1000]
segments = []
for audio_file in audio_files:
sound = AudioSegment.from_file(os.path.join(path, audio_file))
if sound.frame_rate != 48000:
sound = sound.set_frame_rate(48000)
segments.append(sound)
# 合并所有音频文件
merged_sound = segments[0]
for i in range(1, len(segments)):
merged_sound = merged_sound.append(segments[i], crossfade=0)
# 导出合并后的音频文件
output_path = os.path.join(output_dir, subsubdir + ".wav") if part==0 else os.path.join(output_dir, subsubdir + str(part)+".wav")
merged_sound.export(output_path, format="wav")
print("Processed: ",output_path)
del segments
del merged_sound
if __name__ == '__main__':
root_dir = "/path/wavs/raw_wavs/" # 根目录
output_dir = "path_out" # 输出目录
print("开始处理。")
# cpus =20 # 获取机器 CPU 数量
# pool = mp.Pool(processes=cpus)
print(len(os.listdir(root_dir)))
#pdb.set_trace()
# for subsubdir in os.listdir(root_dir):
# pool.apply_async(process_directory, (root_dir,output_dir,subsubdir))
for subsubdir in os.listdir(root_dir):
process_directory(root_dir,output_dir,subsubdir)
pool.close()
pool.join()
print("处理完成。")
音频数据拼接
最新推荐文章于 2024-07-15 16:08:00 发布