音频数据拼接

# -*- coding: utf-8 -*-
import multiprocessing as mp
from pydub import AudioSegment
import os
import pdb
def process_directory(root_dir,output_dir,subsubdir):

       
    done_list = [name_done[:-4] for name_done in os.listdir('path_done')]
    done_list2 = [name_done[:-4] for name_done in os.listdir('path_done2')]
    done =done_list+done_list2+['ignore']

    if subsubdir in done:
        print(subsubdir,'done')
        pass
    else:
        print(subsubdir,'wait')
        try:
            path = os.path.join(root_dir,subsubdir+'/data/audio/')
            file_list = os.listdir(path)
        except:
            file_list = []
            for subdir, dirs, files in os.walk(os.path.join(root_dir,subsubdir)):
                for file in files:
                    file_path = os.path.join(subdir, file)
                    #if os.path.isfile(file_path) and (file_path.endswith('.wav') or file_path.endswith('.mp3')):
                        # 如果是音频文件则将其添加到列表中
                    file_list.append(file_path)
        
        file_list = os.listdir(path)
        audio_files = [f for f in file_list if f.endswith(".wav")]

        if len(audio_files) > 0: # 子目录中有音频文件
            
            print(len(audio_files))
            total_list=audio_files
            
            for part in range(len(audio_files)//1000):
                audio_files =total_list[part*1000:(part+1)*1000] 
                segments = []
                for audio_file in audio_files:
                    sound = AudioSegment.from_file(os.path.join(path, audio_file))
                    if sound.frame_rate != 48000:
                        sound = sound.set_frame_rate(48000)
                    segments.append(sound)

                # 合并所有音频文件
                
                
                merged_sound = segments[0]
                for i in range(1, len(segments)):
                    merged_sound = merged_sound.append(segments[i], crossfade=0)

                # 导出合并后的音频文件
                output_path = os.path.join(output_dir, subsubdir + ".wav") if part==0 else os.path.join(output_dir, subsubdir + str(part)+".wav")
                merged_sound.export(output_path, format="wav")
                print("Processed: ",output_path)
                del segments
                del merged_sound
if __name__ == '__main__':

    root_dir = "/path/wavs/raw_wavs/"   # 根目录
    output_dir = "path_out"  # 输出目录
    print("开始处理。")
    # cpus =20  # 获取机器 CPU 数量
    # pool = mp.Pool(processes=cpus)
    print(len(os.listdir(root_dir)))
    #pdb.set_trace()
    # for  subsubdir in os.listdir(root_dir):
    #     pool.apply_async(process_directory, (root_dir,output_dir,subsubdir))
    
    for  subsubdir in os.listdir(root_dir):
        process_directory(root_dir,output_dir,subsubdir)
    pool.close()
    pool.join()

    print("处理完成。")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值