python发声-python3-声音处理

最新推荐文章于 2023-12-15 16:00:00 发布

weixin_37988176

最新推荐文章于 2023-12-15 16:00:00 发布

阅读量543

点赞数

先来说下二进制读写文件，这需要struct库

#二进制文件读写

importstructa=1b=-1# print(struct.pack("h",b))

# print(struct.pack("i",b))

f=open("bbb.bin","wb")

f.write(struct.pack("h",a))#对a装包，并写入

f.write(struct.pack("h",b))

f.close()

f=open("bbb.bin",'rb')

b1=f.read(2)

a=struct.unpack("h",b1)#解包操作

b2=f.read(2)

b=struct.unpack("h",b2)

print(a,b)

然后是通过python创造一段声音

importwaveimportmathimportstruct

f=wave.open("v0.wav","w")

f.setframerate(8000)#声音频率

f.setnchannels(1)#声道数

f.setsampwidth(2)#声音宽度

t=0#时间

v=0.5#音量

dt=1/8000.0#录入声音的时间

while t<5:

s=math.sin(t*math.pi*2*800)*v*32768#设置声音频率为-32768到32768，并且为正弦变化曲线s=int(s)

fd=struct.pack("h",s)#二进制写入

f.writeframes(fd)

t+=dt

f.close()

运用这个原理可以写一个简单的声音文件，比如说两只老虎

importwaveimportmathimportstruct

ff=wave.open("v1.wav","w")

ff.setframerate(8000)

ff.setnchannels(1)

ff.setsampwidth(2)def wv(t=0,f=0,v=0.5,wf=ff,sr=8000):'''t:写入时长

f:声音频率

v：音量

wf：一个可以写入的音频文件

sr：采样率'''tt=0

dt=1.0/srwhile tt<=t:

s=math.sin(tt*math.pi*2*f)*v*32768#采样，调节音量，映射到[-2^15,2^15)

s=int(s)

fd=struct.pack("h",s)#转换成8bit二进制数据

wf.writeframes(fd)#写入音频文件

tt+=dt#时间流逝

note={"1":262,"2":294,"3":330,"4":349,"5":392,"6":440,"7":494,"6-":220,"0":0}

n=["1","2","3","1","1","2","3","1","0","3","4","5","0","3","4","5","0","5","6","5","4","3","1","0","5","6","5","4","3","1","0","2","6-","1","0","2","6-","1"]

tm=[2,2,2,2,2,2,2,2,1,2,2,2,1.5,2,2,2,2,1,1,1,1,2,2,1,1,1,1,1,1,2,1,2,2,2,2,2,2,2]for i inrange(len(n)):

wv(tm[i]/4.0,note[n[i]])

ff.close()

#河大校歌

importwaveimportmathimportstruct

ff=wave.open("henu.wav","w")

ff.setframerate(8000)

ff.setnchannels(1)

ff.setsampwidth(2)

ff.getsampwidthdef wv(t=0,f=0,v=0.5,wf=ff,sr=8000):'''t:写入时长

f:声音频率

v：音量

wf：一个可以写入的音频文件

sr：采样率'''tt=0

dt=1.0/srwhile tt<=t:

s=math.sin(tt*math.pi*2*f)*v*math.pow(math.e,-2.5*tt)*32768#采样，调节音量，映射到[-2^15,2^15)

s=int(s)

fd=struct.pack("h",s)#转换成8bit二进制数据

wf.writeframes(fd)#写入音频文件

tt+=dt#时间流逝

note={"-":0,"0":0,"1---":49,"1#---":52,"2b---":52,"2---":55,"2#---":58,"3b---":58,"3---":62,"4---":65,"4#---":69,"5b---":69,"5---":73,"5#---":78,"6b---":78,"6---":82,"6#---":87,"7b---":87,"7---":92,"1--":98,"1#--":104,"2b--":104,"2--":110,"2#--":117,"3b--":117,"3--":123,"4--":131,"4#--":139,"5b--":139,"5--":147,"5#--":156,"6b--":156,"6--":165,"6#--":175,"7b--":175,"7--":185,"1-":196,"1#-":208,"2b-":208,"2-":220,"2#-":233,"3b-":233,"3-":247,"4-":262,"4#-":277,"5b-":277,"5-":294,"5#-":311,"6b-":311,"6-":330,"6#-":349,"7b-":349,"7-":370,"1":392,"1#":415,"2b":415,"2":440,"2#":466,"3b":466,"3":494,"4":523,"4#":554,"5b":554,"5":587,"5#":622,"6b":622,"6":659,"6#":698,"7b":698,"7":740,"1+":784,"1#+":831,"2b+":831,"2+":880,"2#+":932,"3b+":932,"3+":988,"4+":1047,"4#+":1109,"5b+":1109,"5+":1175,"5#+":1245,"6b+":1245,"6+":1319,"6#+":1397,"7b+":1397,"7+":1480,"1++":1568,"1#++":1661,"2b++":1661,"2++":1760,"2#++":1865,"3b++":1865,"3++":1976,"4++":2093,"4#++":2217,"5b++":2217,"5++":2349,"5#++":2489,"6b++":2489}

n=["5-","1","1","1","7-","1","3","3","3","2","5","5","6","5","4","3","2","-","3","2","1","7-","6-","7-","1","2","3","4","6","5","4","3","2","3","1","5","-","2","3","4","4","6","6","5","4","3","4","2","3","5","2","3","2","7-","5-","1","3","5","6","5","-","6","4","2","-","2","3","4","2","3","-","6-","7-","1","-"]

tm=[2,2,2,1,1,2,2,2,1,1,3,1,2,2,2,2,2,2,3,1,1,1,2,3,1,1,1,2,2,2,2,2,2,1,1,2,2,3,1,2,2,3,1,1,1,2,2,2,2,2,3,1,1,1,2,1,1,1,1,2,2,2,2,2,2,1,1,1,1,2,2,3,1,3,4]for i inrange(len(n)):

wv(tm[i]/2.0,note[n[i]])

ff.close()

接下来是比较高级的了，显示声音的音波

import matplotlib.pyplot as plt

import wave

import struct

fig=plt.figure()

ax=fig.add_subplot(1,1,1)#如果参数是349的意思是：将画布分割成3行4列，图像画在从左到右从上到下的第9块

plt.ion()#使matplotlib的显示模式转换为交互（interactive）模式。即使在脚本中遇到plt.show()，代码还是会继续执行。

read_size=200#每次绘画的帧数

pass_f=0#跳过前pass的帧数

draw_channel=0

stay_time=1.0

file=wave.open("henu.wav","r")

frame_rate=file.getframerate()

frames = file.getnframes()#得到频率

channels=file.getnchannels()#得到声道数

draw_channel=draw_channel %channels

i=0

x_data=[x / read_size for x in range(read_size)]

fmt="h" * (read_size * channels)

while i

fs=file.readframes(read_size)

i+=read_size

if pass_f > 0:

pass_f-=1

continue

f_data = struct .unpack(fmt,fs) # f_data中包含多个声道的声音

y_data =[]

for j in range(0,len(f_data)):

if j%channels==draw_channel:

y_data.append(f_data[j]/32768.0)

lines = ax.plot(x_data, y_data, 'g-', lw=1)

plt.pause(stay_time)

ax.lines.remove(lines[0])

file.close()

这个是拟合一个图像的曲线，机器学习？？？？

#训练一个多项式，来拟合一条曲线

from PIL importImageimportmatplotlib.pyplot as pltimportnumpy as npimportrandomimportmathimporttensorflow as tf

pix2=math.pi*2img=np.array(Image.open("./p2.bmp"),dtype=np.int32)#(249, 277, 3)图像的高度，宽度，通道rgb#Y中点125

data=[]

a=img[:,:,0]+img[:,:,1]+img[:,:,2]

h,w,_=np.shape(img)

nw=-1

for j inrange(w):for i inrange(h):if a[i,j]<200:

data.append([j*1.0/w,(125.0-i)*2.0/w])if j%13==0 and nw!=j:

nw=j

data_len=len(data)#y=a1*sin(2pix*b1)+a2*sin(2pix*b2)+a3*sin(2pix*b3)...

#画图

fig=plt.figure()#生成图像框

ax=fig.add_subplot(1,1,1)#编号

ax.scatter([ w[0] for w in data],[w[1] for w in data],linewidths=1)

plt.ion()########################################################模型

w_size=8batch_size=64x=tf.placeholder(dtype=tf.float32, shape=[None,1])

y=tf.placeholder(dtype=tf.float32, shape=[None,1])

w=tf.Variable(tf.truncated_normal(shape=[w_size,1],mean=0.0,stddev=2/w_size,dtype=tf.float32))#c=tf.constant([[(i+1)*2*math.pi for i in range(w_size)]],tf.float32)

c=tf.Variable(tf.constant([[(i+1)*2*math.pi for i inrange(w_size)]],tf.float32))

res=tf.matmul(tf.sin(tf.matmul(x,c)),w)

loss=tf.reduce_mean(tf.abs((res-y)))

train_step=tf.train.AdamOptimizer(0.02).minimize(loss)

init=tf.global_variables_initializer()

sess=tf.Session()

sess.run(init)for i in range(1,10000):

rnddata=[random.randint(1,data_len-1) for w inrange(batch_size)]

x_d=[[data[p][0]] for p inrnddata]

y_d=[[data[p][1]] for p inrnddata]

ls,st=sess.run([loss,train_step],feed_dict={x:x_d,y:y_d})print(i,":",ls)if i%100==0:

x_data=[[j/500.0] for j in range(500)]

y_data,ww,cc=sess.run([res,w,c],feed_dict={x:x_data})print(ww)print(cc)

lines=ax.plot([j[0] for j in x_data],[j[0] for j in y_data],'r-',lw=1)

plt.pause(1)

ax.lines.remove(lines[0])

最后是你输入文字然后让它发音，前提是你要有汉字表和发音表

t2pinyin={}

yunmu="āáǎàōóǒòēéěèīíǐìūúǔùǖǘǚǜ"

yindiao="123412341234123412341234"

rp="aaaaooooeeeeiiiiuuuuvvvv"

for i in range(1,10):#总共有十个文件，循环打开

fn="./pinyin1/h"+str(i)+".txt"

py=open(fn,"r",encoding="utf-8")#打开文件名只读 utf-8

line1=py.readline() #读取一行，再次调用就是下一行，先把第一行文字读进来

line2="" #读拼音

while len(line2)<=10:#查找哪一行是拼音

line2=py.readline()

line1=line1.replace(" ","")#替换换行

line2=line2.replace(" ","").replace("ü","v")

py.close()

pinyin=line2.split(" ")#变换成列表

for j in range(len(line1)):

word=line1[j]#word和pyj照应

pyj=pinyin[j]

flag=0

for k in range(24):

if pyj.find(yunmu[k])>=0:#查找拼音里的韵母

pyj=pyj.replace(yunmu[k],rp[k])+yindiao[k]#替换，并标记声调

flag=1

break

if flag==0:#没找到就是轻声

pyj=pyj+"0"

t2pinyin[word] = pyj

print(t2pinyin["大"],t2pinyin["家"],t2pinyin["好"])

s="我在学拍森"

import wave

wf=wave.open("py.wav","w")

wf.setframerate(16000)#设置频率的最大值

wf.setnchannels(2)#设置声道数

wf.setsampwidth(2)#设置声音宽度

for w in s:

if w in t2pinyin:#找s中的每个字的读音

vv=t2pinyin[w]

file_name = "./pinyin/" + vv + ".wav"

speak_file=wave.open(file_name,"r")

data=speak_file.readframes(100000000)#将该文件下的前100000000个频率赋值给data

wf.writeframes(data)

speak_file.close()

wf.close()

weixin_37988176

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python发声-python3-声音处理

先来说下二进制读写文件，这需要struct库#二进制文件读写importstructa=1b=-1# print(struct.pack("h",b))# print(struct.pack("i",b))f=open("bbb.bin","wb")f.write(struct.pack("h",a...
复制链接

扫一扫

python发声-python3-声音处理

“相关推荐”对你有帮助么？