import fnmatch
from pathlib import Path
from scipy.io import wavfile
import webrtcvad
import struct
from scipy.io.wavfile import write
import os
import numpy as np
import matplotlib.pyplot as plt
#开始将所有文件进行批处理操作
def find_all_files(files_path):
"""遍历指定文件夹所有指定类型文件"""
p = Path(files_path)
files_names = [] # 存储文件路径名称
for file in p.rglob('*.wav'): # 寻找所有txt文件
x = str(file).split('\\')[-1]
if fnmatch.fnmatch(x, '._*.wav'):
continue
else:
files_names.append(str(file)) # 以字符串形式保存
return files_names
def all_void_void(single_file_path,flag_session):
#single_file_path = 'F:\文件\情感数据集\未处理\IEMOCAP_full_release\Session1\sentences\wav\Ses01F_impro01\Ses01F_impro01_F013.wav'
#single_file_path = 'E:\coding\python\pythonfor_deleteVD\Ses01F_impro01_F000.wav'
print(os.path.join(single_file_path))
sample_rate, samples = wavfile.read(os.path.join(single_file_path))
file_name = single_file_path.split("\\")[-1]
file_package = single_file_path.split("\\")[-2]
print(file_name)
vad = webrtcvad.Vad()
vad.set_mode(3)
raw_samples = struct.pack("%dh" % len(samples), *samples)
window_duration = 0.03
samples_per_window = int(window_duration * sample_rate + 0.3)
bytes_per_sample = 2
segments = []
try:
for start in np.arange(0, len(samples), samples_per_window):
stop = min(start + samples_per_window, len(samples))
is_speech = vad.is_speech(raw_samples[start * bytes_per_sample: stop * bytes_per_sample],
sample_rate=sample_rate)
segments.append(dict(
start=start,
stop=stop,
is_speech=is_speech))
except:
try:
speech_samples = np.concatenate(
[samples[segment['start']:segment['stop']] for segment in segments if segment['is_speech']])
except:
print('产生异常了第一次')
try:
speech_samples = np.concatenate(
[samples[segment['start']:segment['stop']] for segment in segments if segment['is_speech']])
new_path = 'F:\文件\研究生组\情感数据集\已处理\IEMOCAP_full_release\\' + flag_session + '\sentences\wav\\' + file_package
# os.makedirs(new_path)
if (os.path.exists(new_path)):
print('1')
else:
os.makedirs(new_path)
new_path =new_path+ '\\'+file_name
wavfile.write(new_path, sample_rate, speech_samples)
except:
print('产生异常了第二次')
#new_path = 'F:\文件\研究生组\情感数据集\已处理\IEMOCAP_full_release\Session1\sentences\wav\\'+file_package
# os.makedirs(new_path)
# if(os.path.exists(new_path)):
# print('1')
# else:
# os.makedirs(new_path)
# new_path = new_path + '\\'+file_name
# wavfile.write(new_path, sample_rate, speech_samples)
#Session_list =["Session1","Session2","Session3","Session4","Senssion5"]
Session_list =["Session5"]
for senssion_i in Session_list:
files_name = find_all_files('F:\文件\情感数据集\未处理\IEMOCAP_full_release\\'+senssion_i+'\sentences\wav')
for file_name in files_name:
all_void_void(file_name,senssion_i)
06-21
489
05-10
6119