制造回声
import soundfile as sf
import librosa
import numpy as np
#制造一次回声
def one_echo(y, time_delay):
y_echo = np.zeros(y.size)
y_echo[0 : time_delay] = np.zeros(time_delay)
y_echo[time_delay : y.size] = y[0: y.size - time_delay];
y2 = (y * 0.9 + y_echo * 0.1)#回声和原声9:1能量占比
return y2;
#读入音频数据,设置回声参数
wav_input = "in.wav"
y, sample_rate = librosa.load(wav_input, sr=None)
distance_delay = 50 #50m距离产生回声
time_delay = int(sample_rate * distance_delay/340);
#若干次回声效果
echo_times = int(5)
for i in range(echo_times):
y = one_echo(y, time_delay)
#输出处理后效果
path_output = "out.wav"
sf.write(path_output, (y * 32767).astype(np.int16), sample_rate)
单通道音频制作双声道立体音
import soundfile as sf
import librosa
import numpy as np
#制造一个双声道立体声,共一个信号源,通过左右声道的“响度+时延”来制造立体环绕音的感觉
def make_stereo(path_wav_input):
y, sample_rate = librosa.load(path_wav_input, sr=None)
y_left = y
y_right = y
y_stereo = np.zeros((y.size, 2))
#立体声左右变换周期数
n_swap = 2;#n_swap个周期进行左右变换,设置越多,左右变换越快
t_circle = y.size / n_swap;#周期长
#立体声时延, 这个会导致个别时延失效点处的波形不够光滑,时间设置过长容易感知频率的伸缩(多普勒效应)
distance_delay = 10 # 50m距离产生的时延
time_delay = int(sample_rate * distance_delay / 340);
for i in range(y.size - 1):
#远近响度:结合cos函数做0~1的周期循环
l_ratio = 0.5 + 0.5 * np.cos(2 * np.pi * i/t_circle)
r_ratio = 1 - l_ratio
#远近延时,音量小的会有些时间滞后
l_delay = 0
r_delay = 0
if i < y.size - 1 - time_delay:
l_delay = int(r_ratio * time_delay)
r_delay = int(l_ratio * time_delay)
#print(l_delay, r_delay)
y_stereo[i, 0] = l_ratio * y_left[i + l_delay]
y_stereo[i, 1] = r_ratio * y_right[i + r_delay]
path_output = "out_stereo.wav"
sf.write(path_output, (y_stereo * 3
2767).astype(np.int16), sample_rate)
return y_stereo;
make_stereo("in.wav")
绘制波形的mel谱
#coding:utf-8
import numpy as np
import librosa
import librosa.filters
import scipy.signal as signal
#绘图相关
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot_alignment(alignment, path, info=None):
fig, ax = plt.subplots()
im = ax.imshow(
alignment,
aspect='auto',
origin='lower',
interpolation='none')
fig.colorbar(im, ax=ax)
xlabel = 'phone list'
if info is not None:
xlabel += '\n\n' + info
plt.xlabel(xlabel)
plt.ylabel('mel-spectrum')
plt.tight_layout()
plt.savefig(path, format='png')
#声学相关
class hparams:
num_mels=80
num_freq=1025
sample_rate=16000
frame_length_ms=50
frame_shift_ms=12.5
preemphasis=0.97
min_level_db=-100
ref_level_db=20
def load_wav(path):
return librosa.core.load(path, sr=hparams.sample_rate)[0]
def melspectrogram(y):
D = _stft(_preemphasis(y))
S = _amp_to_db(_linear_to_mel(np.abs(D)))
return _normalize(S)
def _stft(y):
n_fft = (hparams.num_freq - 1) * 2
hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate)
win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate)
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
def _preemphasis(x):
return signal.lfilter([1, -hparams.preemphasis], [1], x)
def _amp_to_db(x):
return 20 * np.log10(np.maximum(1e-5, x))
_mel_basis = None
def _linear_to_mel(spectrogram):
global _mel_basis
if _mel_basis is None:
_mel_basis = _build_mel_basis()
return np.dot(_mel_basis, spectrogram)
def _build_mel_basis():
n_fft = (hparams.num_freq - 1) * 2
return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
def _normalize(S):
return np.clip((S - hparams.min_level_db) / -hparams.min_level_db, 0, 1)
filename = "test.wav"
wave = load_wav(filename)
D = _stft(_preemphasis(wave))
S = _amp_to_db(_linear_to_mel(np.abs(D)))
mel_spectrogram = _normalize(S).astype(np.float32)
fig, ax = plt.subplots()
hotmap = ax.imshow(mel_spectrogram[::-1])
plt.colorbar(mappable=hotmap)
plt.show() 波形生成与fft+ifft:
任意维度的卷积的实现:
import os
import sys
import re
import audio
import numpy as np
# 计算各维度代表的元素数量【三维如(2,3,4):元素总数量2*3*4=24,层含元素数量(2层,每层3*4=12个元素),行含元素数量(层内每行含4个元素),列含元素数量(行内每列只有1个元素),返回一个(24,12,4,1)的列表,改列表用来进行“元素总下标-》各维度下标(层号、行号、列号)的计算】
def my_nda_multiply(input):
res = 1;
mylist = [res]
for i in range(input.size):#x in np.nditer(input_reverse):
res *= input[input.size-1-i];
mylist.append(res)
mylist.reverse()
return mylist
#任意维度矩阵的卷积实现
def my_conv(input, filter, stride_shape):
#验证维度合法性
f_shape = filter.shape
s_shape = stride_shape
if input.ndim != filter.ndim or input.ndim != stride_shape.size:
print("dim not match!")
return
##计算输出矩阵output的维度
o_shape = np.zeros(input.ndim, dtype = int)
for i in range(o_shape.size):
o_shape[i] = int(np.ceil((1 + input.shape[i] - f_shape[i])/s_shape[i]))
print(o_shape)
##计算输出矩阵output的每个元素的值(先列表形式存储)
output_list = []
# 计算各维度代表的元素数量【三维如(2,3,4):元素总数量2*3*4=24,层含元素数量(2层,每层3*4=12个元素),行含元素数量(层内每行含4个元素),列含元素数量(行内每列只有1个元素),返回一个(24,12,4,1)的列表,改列表用来进行“元素总下标-》各维度下标(层号、行号、列号)的计算】
mul_list = my_nda_multiply(o_shape)
#总的元素数量
num_output_element = mul_list.pop(0);#list的头部元素弹出来,它是输出矩阵的元素总数量
# output矩阵每个元素取值的求取
for order in range(num_output_element):
# 计算output第order个元素时,需要在input上面做数据截取的窗口
index_range_list = [slice(None)] * input.ndim
order_left = order
# output第order元素 换成多维矩阵时,各维度的下标序列获取,input截取窗口的各维度下标范围获取
for layer_orer in range(len(mul_list)):
a_layer_element_count = mul_list[layer_orer]
index_output = int(order_left/a_layer_element_count)
order_left = np.mod(order_left, a_layer_element_count)
#input截取窗口的各维度下标范围获取
index_start = index_output * s_shape[layer_orer]
index_end = index_output * s_shape[layer_orer] + f_shape[layer_orer]
index_range_list[layer_orer] = slice(index_start, index_end, 1)
print("index_range_list is ", index_range_list)
aslice = input[index_range_list]
# 获取卷积的inputmap,计算卷积inputmap*filter, 保存结果到output-list,
a_element = np.dot(filter.reshape(filter.size), aslice.reshape(aslice.size), out=None)
output_list.append(a_element)
#output reshape成目标ndarray
output = np.array(output_list)
output = output.reshape(o_shape);
#output结果输出打印
print("input="+"\n", input, "\n"+"i"*50)
print("output="+"\n", output, "\n"+"o"*50)
return output
#多维卷积的测试
d = 2
if d == 2:
input = np.arange(12, dtype=float).reshape(3,4)
filter = np.array([[0,1], [0,0]],dtype=float)
s_shape = np.array([1,1])
output = my_conv(input, filter, s_shape)
d = 3
if d == 3:
input = np.arange(24, dtype=float).reshape(2,3,4)
filter = np.zeros((2,2,2), dtype=float)
filter[0,0,0] = 1
s_shape = np.array([1,1,1])
output = my_conv(input, filter, s_shape)
下载文件
import requests r = requests.get('http://i.pegpic.com/pic/028326/koqn0d5tgankoqn0d5tgan.jpg'); filename = 'D://a.jpg' target = open(filename,'wb') target.write(r._content); target.close();
文件查找:
import
glob
#获取指定目录下的所有图片
print
glob
.
glob
(r
"E:/Picture/*/*.jpg"
)
#获取上级目录的所有.py文件
print
glob
.
glob
(r
'../*.py'
)
#相对路径
目录文件处理
#coding: utf-8
import os;
import re;
#主函数
def main(fpath):
#枚举fpath目录下的每个文件
for root, dirs, files in os.walk( fpath):
for filename in files:
full_file = '%s\%s' % (root,filename)
print full_file
process_a_file(full_file);
#处理单个文件
def process_a_file(afile):
print afile;
ofile = open(afile,'r')
wfile = open("out.txt",'a')
str_head = "NEW DOC %s \n" % afile
wfile.write(str_head)
do_write = 0;
for eachline in ofile:
aline = eachline.rstrip()
if None <> re.match('"*"',aline):
if None <> re.match('"toBI"',aline):
do_write = 1;
else:
do_write = 0;
else:
if do_write == 1:
wfile.write(aline);
wfile.write("\n")
wfile.close()
ofile.close()
#主函数调用
main('D:\Temp');
文件统计:
#coding: utf-8
import os;
import re;
import sys;
dic_word = {'test' : 1};
dic_phone = {'HH' : 1};
dic_b_phone = {'HH HA' : 1};
dic_t_phone = {'HH HA HA' : 1};
dic_sen_len = {8: 10};#长度为8个单词的句子有10个
dic_rhythm_word_len = {2 : 5 };#韵律长度统计,单词级别,长度为2个word的韵律短语有5个
dic_rhythm_phone_len = {2 : 5};#韵律长度统计,phone级别,长度为2个phome的韵律短语有5个
#主函数
def main(fpath):
dic_word.clear();
dic_phone.clear();
dic_b_phone.clear();
dic_t_phone.clear();
dic_sen_len.clear();
dic_rhythm_word_len.clear();
dic_rhythm_phone_len.clear();
#处理文件
ofile = open(fpath,'r');
order = 1;
for eachline in ofile:
aline = eachline;
if (order % 2 == 1):# 句子行处理
process_word_line(aline[7:]);
else: #音标行处理
process_phone_line(aline);
order = order + 1;
#输出结果
analysis_out();
def process_word_line(aline):#单词行处理
aline = aline.lower();
#韵律短语统计
rhythm_word_calc(aline);# /%隔开统计word级别的韵律短语
#换掉斜线等符号
aline = clean_aline(aline);
#切割统计
words = aline.split(' ');
words = [w for w in words if w != ""]
#统计句子长度
dic_add(dic_sen_len,len(words));
#统计单词
for word in words:
dic_add(dic_word, word);
return;
def process_phone_line(aline): #音标行处理
#韵律短语统计
rhythm_phone_calc(aline);# /.隔开统计phome级别的韵律短语
#换掉斜线等符号
aline = clean_aline(aline);
#切割统计
phones = aline.split(' ');
#phones.remove('');
phones = [w for w in phones if w != ""]
for phone in phones:
dic_add(dic_phone, phone);
#统计双音子
for i in range(0, len(phones)-1):
b_phone = "%s-%s" % (phones[i], phones[i+1]);
dic_add(dic_b_phone, b_phone);
#统计三音子
for i in range(0, len(phones)-2):
t_phone = "%s-%s-%s" % (phones[i], phones[i+1], phones[i+2]);
dic_add(dic_t_phone, t_phone);
return;
def rhythm_word_calc(aline):# 单词以/或者%结尾表示一个韵律短语
aline = clean_aline_word_rhythm(aline);
#print(aline)
words = aline.split(' ');
words = [w for w in words if w != ""]
#print(len(words));
pre_pos = -1;
for i in range(0, len(words)):
if (words[i].find('/') != -1 or words[i].find('%') != -1):
dic_add(dic_rhythm_word_len, i-pre_pos);
#print 'found %d' % (i-pre_pos);
pre_pos = i;
#print(words[i]);
return;
def rhythm_phone_calc(aline):# 单词以/或者%结尾表示一个韵律短语
aline = clean_aline_phone_rhythm(aline);
#print(aline)
phones = aline.split(' ');
phones = [w for w in phones if w != ""]
#print(len(phones));
pre_pos = -1;
for i in range(0, len(phones)):
if (phones[i].find('/') != -1 or phones[i].find('.') != -1):
dic_add(dic_rhythm_phone_len, i-pre_pos);
#print 'found %d' % (i-pre_pos);
pre_pos = i;
#print(phones[i]);
return;
#output results
def analysis_out():
output_a_dic(dic_word, 'word.log');
output_a_dic(dic_phone, 'phone.log');
output_a_dic(dic_sen_len, 'sen_len.log');
output_a_dic(dic_b_phone, 'dic_bi_phone.log');
output_a_dic(dic_t_phone, 'dic_tri_phone.log');
output_a_dic(dic_rhythm_word_len, 'dic_rhythm_word_len.log');
output_a_dic(dic_rhythm_phone_len, 'dic_rhythm_phone_len.log');
return;
def output_a_dic(a_dic, filename):
a_list = sorted(a_dic.iteritems(), key = lambda asd:asd[1], reverse = True);#value进行排序
wfile = open(filename,'w')
for a_turp in a_list:
aline = '%s %d \n' % (a_turp[0], a_turp[1])
wfile.write(aline);
wfile.close();
return;
#辅助函数
def clean_aline(aline):
#print(aline);
regex = re.compile(r"\/\s", re.IGNORECASE);
aline = regex.sub(" ",aline);
regex = re.compile(r"\.\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\,\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\!\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\"\s", re.IGNORECASE);
aline = regex.sub("",aline);
aline = aline.replace("%"," ");
aline = aline.rstrip();
aline = aline.lstrip();
#print(aline);
return aline;
def clean_aline_word_rhythm(aline):
regex = re.compile(r"\.\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\,\s", re.IGNORECASE);
aline = regex.sub(" ",aline);
regex = re.compile(r"\!\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\"\s", re.IGNORECASE);
aline = regex.sub("",aline);
aline = aline.replace('"','');
aline = aline.rstrip();
aline = aline.lstrip();
return aline;
def clean_aline_phone_rhythm(aline):
regex = re.compile(r"\,\s", re.IGNORECASE);
aline = regex.sub(" ",aline);
regex = re.compile(r"\!\s", re.IGNORECASE);
aline = regex.sub("",aline);
regex = re.compile(r"\"\s", re.IGNORECASE);
aline = regex.sub("",aline);
aline = aline.replace(" /","/");
aline = aline.replace(" .",".");
aline = aline.rstrip();
aline = aline.lstrip();
aline = aline + '.'
return aline;
def dic_add(adic, akey):
if adic.has_key(akey) != True:
adic[akey] = 1;
else:
v = adic[akey];
adic[akey] = v + 1;
return ;
if len(sys.argv) == 1:#默认参数
#主函数调用
main('C:\\Users\\huangzhiqiang\\PycharmProjects\\untitled\\ef4_6k.txt');
elif len(sys.argv) == 2:
print(sys.argv[1]);
main(str(sys.argv[1]));
else:
print("parameters error\n");
#main('C:\\Users\\huangzhiqiang\\PycharmProjects\\untitled\\ef4_6k.txt');