代码已经调通,链接在下文,可以讲解哟,有需要,可以私信我,真实有效!!!,
1、数据集制作
采集三种不同声音,将其保存为A.wav,B.wav,C.wav;使用Matlab代码将其混合成ABC.wav,以此类推做成自己所需要的数据集。
2、模型设计
网络模型结构还是基于全时域卷积神经网络,将其改成三声源分离。
3、模型训练
| 描述 | 值 |
| 训练轮数 | 50 |
| 每个批量大小 | 4 |
| 分割块长度 | 4 |
| 初始学习率 | 0.0001 |
| 优化器 | adam |
| 激活函数 | sigmoid |
4、模型推理
import os
import torch
import sys
sys.path.append('./options')
from AudioReader import AudioReader, write_wav, read_wav
import argparse
from torch.nn.parallel import data_parallel
from Conv_TasNet import ConvTasNet
from utils import get_logger
# from option import parse
from option import parse
import tqdm
import time
class Separation():
def __init__(self, mix_path, yaml_path, model, gpuid):
super(Separation, self).__init__()
self.mix = read_wav(mix_path)
opt = parse(yaml_path, is_tain=False)
net = ConvTasNet(**opt['net_conf'])
dicts = torch.load(model, map_location='cpu')
net.load_state_dict(dicts["model_state_dict"])
self.logger = get_logger(__name__)
self.logger.info('Load checkpoint from {}, epoch {: d}'.format(model, dicts["epoch"]))
# self.net=net.cuda()
self.net=net.cpu()
# self.device=torch.device('cuda:{}'.format(
# gpuid[0]) if len(gpuid) > 0 else 'cpu')
self.device = torch.device("cpu")
self.gpuid=tuple(gpuid)
def inference(self, file_path):
with torch.no_grad():
egs=self.mix.to(self.device)
norm = torch.norm(egs,float('inf'))
if len(self.gpuid) != 0:
ests=self.net(egs)
spks=[torch.squeeze(s.detach().cpu()) for s in ests]
else:
ests=self.net(egs)
spks=[torch.squeeze(s.detach()) for s in ests]
index=0
for s in spks:
s = s[:egs.shape[0]]
#norm
s = s*norm/torch.max(torch.abs(s))
index += 1
os.makedirs(file_path+'/spk'+str(index), exist_ok=True)
# filename=file_path+'/spk'+str(index)+'/'+key
filename = file_path + '/spk' + str(index)+'.wav'
write_wav(filename, s, 44100)
self.logger.info("Compute over {:d} utterances".format(len(self.mix)))
def main():
parser=argparse.ArgumentParser()
parser.add_argument(
'-mix_scp', type=str, default='mix.wav', help='Path to mix scp file.')
parser.add_argument(
'-yaml', type=str, default='./options/train/train.yml', help='Path to yaml file.')
parser.add_argument(
'-model', type=str, default='./Conv-TasNet-with-skip/best.pt', help="Path to model file.")
parser.add_argument(
'-gpuid', type=str, default='0', help='Enter GPU id number')
parser.add_argument(
'-save_path', type=str, default='./non-pit-2', help='save result path')
args=parser.parse_args()
gpuid=[int(i) for i in args.gpuid.split(',')]
start = time.time()
separation=Separation(args.mix_scp, args.yaml, args.model, gpuid)
separation.inference(args.save_path)
end = time.time()
st = str(end-start)
print("分离所需时间"+st+"s")
if __name__ == "__main__":
main()
#python Separation_wav.py -mix_scp ./ceshi/0.wav
代码链接:https://download.csdn.net/download/m0_60103585/89241414
3002

被折叠的 条评论
为什么被折叠?



