RNNoise降噪训练

最新推荐文章于 2024-09-11 10:06:41 发布

韩搏

最新推荐文章于 2024-09-11 10:06:41 发布

阅读量3.7w

点赞数 1

分类专栏：其它文章标签： tensorflow 深度学习神经网络

本文链接：https://blog.csdn.net/hanbo622/article/details/120779545

版权

其它专栏收录该内容

23 篇文章 0 订阅

订阅专栏

1.下载源码点击下载rnnoise代码，或者去github下载
2.编译源码

sudo apt-get install autoconf automake libtool
./autogen.sh
./configure
make3

3.训练

pip依赖

pip install numpy h5py
pip install grpcio==1.36.1
pip install keras==2.2.4 tensorflow==1.12.0  #版本必须对应 tensorflow-gpu==1.12.0
pip install protobuf==3.8.0

cd src ; ./compile.sh
./denoise_training signal.raw noise.raw 10000 > training.f32
cd training ; ./bin2hdf5.py …/src/training.f32 10000 87 training.h5
./rnn_train.py
./dump_rnn.py weights.hdf5 …/src/rnn_data.c rnn_data.h orig

4.降噪

./rnnoise_demo noise.raw out.raw

附：

tensorflow: Your CPU supports instructions that this TensorFlow binary was not compiled to use: FMA
遇到了这个问题，意思是你的 CPU 支持AVX2 FMA（加速CPU计算），但安装的 TensorFlow 版本不支持


如果是初学者 或者 没有太大计算速度的需求，在开头加上这两行忽略这个提示即可

	import os
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
PS:
	os.environ["TF_CPP_MIN_LOG_LEVEL"] = '1' # 默认，显示所有信息 
	os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' # 只显示 warning 和 Error 
	os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' # 只显示 Error

音频文件合并处理脚本

#coding: utf-8

import os 
import os.path 
import binascii

out_file_name='1.raw'

ignore_filename = [".raw", ".py"]

def ignore_check_file(file):
	for ignore in ignore_filename:
		if file.find(ignore) > 0:
			return True
	return False

#字符串 >> 二进制 >> hex >> hex 字符串
def str_to_hexStr(string):
    str_bin = string #string.encode('utf-8')
    return binascii.hexlify(str_bin).decode('utf-8')
#hex 字符串 >> hex >> 二进制 >> 字符串
def hexStr_to_str(hex_str):
    hex = hex_str.encode('utf-8')
    str_bin = binascii.unhexlify(hex)
    return str_bin.decode('utf-8')
def print_wav_head(head_msg):
	riff = head_msg[0:4]
	len_byte_0 = str_to_hexStr(head_msg[7:8])
	len_byte_1 = str_to_hexStr(head_msg[6:7])
	len_byte_2 = str_to_hexStr(head_msg[5:6])
	len_byte_3 = str_to_hexStr(head_msg[4:5])
	file_len = int(len_byte_0+len_byte_1+len_byte_2+len_byte_3, 16)
	wave = head_msg[8:12]
	fmt = head_msg[12:16]
	filtration = str_to_hexStr(head_msg[16:17])
	FormatTag = str_to_hexStr(head_msg[20:22])
	Channels = str_to_hexStr(head_msg[22:24])
	samp_byte_0 = str_to_hexStr(head_msg[27:28])
	samp_byte_1 = str_to_hexStr(head_msg[26:27])
	samp_byte_2 = str_to_hexStr(head_msg[25:26])
	samp_byte_3 = str_to_hexStr(head_msg[24:25])
	SamplesPerSec = int(samp_byte_0+samp_byte_1+samp_byte_2+samp_byte_3, 16)
	persec_byte_0 = str_to_hexStr(head_msg[31:32])
	persec_byte_1 = str_to_hexStr(head_msg[30:31])
	persec_byte_2 = str_to_hexStr(head_msg[29:30])
	persec_byte_3 = str_to_hexStr(head_msg[28:29])
	BytesPerSec = int(persec_byte_0+persec_byte_1+persec_byte_2+persec_byte_3, 16)
	BlockAlign = str_to_hexStr(head_msg[32:34])
	BitsPerSample = str_to_hexStr(head_msg[34:36])
	data = head_msg[36:40]
	audio_len_byte_0 = str_to_hexStr(head_msg[43:44])
	audio_len_byte_1 = str_to_hexStr(head_msg[42:43])
	audio_len_byte_2 = str_to_hexStr(head_msg[41:42])
	audio_len_byte_3 = str_to_hexStr(head_msg[40:41])
	audioDataLen = int(audio_len_byte_0+audio_len_byte_1+audio_len_byte_2+audio_len_byte_3, 16)

	print "-----WAV Head info-----"
	print "HEAD:",str_to_hexStr(head_msg)
	print "RIFF(4):",riff #ckid:4字节 RIFF 标志，大写
	print "Len(4):",file_len #cksize:4字节文件长度，这个长度不包括"RIFF"标志(4字节)和文件长度本身所占字节(4字节),即该长度等于整个文件长度-8 
	print "WAV(4):",wave #fcc type：4字节 "WAVE" 类型块标识, 大写
	print "FMT(4):",fmt #ckid:4字节 表示"fmt" chunk的开始,此块中包括文件内部格式信息,小写, 最后一个字符是空格
	print "FILT(4):",'0x'+filtration #cksize:4字节,文件内部格式信息数据的大小,过滤字节（一般为00000010H）
	print "FTAG(2):",FormatTag #FormatTag:2字节，音频数据的编码方式，1：表示是PCM 编码  
	print "CHAN(2):",Channels #Channels:2字节，声道数，单声道为1，双声道为2   
	print "SAMP(4):",SamplesPerSec #SamplesPerSec:4字节,采样率，如44100  
	print "PERSEC(4):",BytesPerSec #BytesPerSec:4字节,音频数据传送速率, 单位是字节.其值为采样率×每次采样大小.播放软件利用此值可以估计缓冲区的大小;bytePerSecond = sampleRate * (bitsPerSample / 8) * channels 
	print "ALIGN(2):",BlockAlign  #BlockAlign:2字节，每次采样的大小 = 采样精度*声道数/8(单位是字节); 这也是字节对齐的最小单位, 譬如 16bit 立体声在这里的值是 4 字节.播放软件需要一次处理多个该值大小的字节数据，以便将其值用于缓冲区的调整 
	print "PERS(2):",BitsPerSample  #BitsPerSample:2字节，每个声道的采样精度; 譬如 16bit 在这里的值就是16.如果有多个声道,则每个声道的采样精度大小都一样的;
	print "DATA(4):",data  #ckid:4字节,数据标志符（data）,表示 "data" chunk的开始.此块中包含音频数据,小写; 
	print "AUDIOLEN(4):",audioDataLen  #cksize:音频数据的长度,4字节,audioDataLen = totalDataLen - 36 = fileLenIncludeHeader - 44  
	print "-----------------------"
	
def read_pcm_file():
	file_count = 0
	wf = open(out_file_name, 'wb')
	for root, dirs, files in os.walk("."):
		#root:当前目录路径 dirs:当前路径下所有子目录 files:当前路径下所有非目录子文件
		for file in files:
			src_file = root + '\\' + file
			if ignore_check_file(file) == False:
				file_count += 1
				print "Process File Name:",src_file
				rf = open(src_file, 'rb')
				if file.find('.wav') > 0:
					wav_head = rf.read(44)
					print_wav_head(wav_head)
				raw_data = rf.read()
				pcm_idx = raw_data.rfind('data')
				if pcm_idx > 0:
					print "Read File Len:", len(raw_data), "PCM idx:", pcm_idx
					wf.write(raw_data[pcm_idx+4+2:])
				rf.close()
	wf.close()			
	print "Process File Count:", file_count
							
if __name__=='__main__':
	read_pcm_file()