目标:
1.学习使用百度AI开放平台进行语音识别与语音合成
百度AI有两种开发方式:REST, SDK; 本文使用的是REST api+python进行开发
文档地址:https://ai.baidu.com/docs#/ASR-Guide/top
2.将学到的内容:JSON数据,HTTP API调用,base64文件编码与解码
3.生成exe格式可执行文件
pyinstaller -F xxx.py 参考文档 https://blog.csdn.net/qq_35203425/article/details/78568141
4.python-Tkinter图形界面开发
问题点:
1.界面上的模块不会随窗口放大产生变化
2.未添加异常捕获
界面:
代码:
from tkinter import *
from tkinter import ttk
from tkinter.filedialog import askopenfilename,askdirectory
import json
import requests
import base64
from urllib.parse import urlencode,quote_plus
import os
class Audio():
def __init__(self):
#参数初始化化
# self.width,self.height,self.xoffset,self.yoffset=800,400,350,200 #定义窗口大小
self.myWindow = Tk()
self.path = StringVar() #搜索路径变量
self.path1=StringVar() #保存路径变量
self.audio_formats = StringVar() #定义音频识别格式
self.API_Key = 'AN6TAou6hNd5UEMxBGxUFXGC' #百度AI秘钥
self.Secret_Key = 'rTb6zkjVWEnuXoEVyv6cSaiblzNfxP6l'
self.url_1 = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=' \
'client_credentials&client_id=AN6TAou6hNd5UEMxBGxUFXGC&client_secret=rTb6zkjVWEnuXoEVyv6cSaiblzNfxP6l& '
self.url_2='http://vop.baidu.com/server_api'
self.url_3 = 'http://tsn.baidu.com/text2audio'
def selectPath(self):
# 路径搜索
self.name = askopenfilename()
self.path.set(self.name)
def savepath(self):
self.name1=askdirectory()
self.path1.set(self.name1)
def audio_recognition(self):
#语言识别模块
audio_path=self.path.get()
audio_formats=self.audio_formats.get()
if len(audio_path) == 0 or len(audio_formats) == 0:
self.text.insert(INSERT, '未选定识别文件')
else:
response = requests.get(url=self.url_1)
self.token = response.json()['access_token']
# 打开文件
with open(audio_path,'rb') as f:
audio=f.read()
# 识别文件
length = len(audio)
speech = base64.b64encode(audio)
speech = str(speech, 'utf-8')
params = {
"format": audio_formats,
"rate": 16000,
"channel": '1',
"token": self.token,
"cuid": 'D8-F2-CA-06-B4-59',
'len': length,
"speech": speech}
post_data = json.dumps(params, sort_keys=False)
text = requests.post(self.url_2, post_data.encode('utf-8'))
# 输出文件内容
if text.json()['err_msg']=='success.':
self.text.insert(INSERT,text.json()[ 'result'])
else:
self.text.insert(INSERT,text.json()['err_msg']+text.json()['err_no'])
def audi_composite(self):
#语言合成模块
response = requests.get(url=self.url_1)
token = response.json()['access_token']
string=self.text1.get(1.0,2.0)
tex = quote_plus(string)
params = {'tok': token, 'tex': tex, 'per': 1, 'spd': 5, 'pit': 5, 'vol': 5, 'aue': 3,
'cuid': 'D8-F2-CA-06-B4-59',
'lan': 'zh', 'ctp': 1}
data = urlencode(params)
audio = requests.post(self.url_3, data.encode('utf-8'))
if len(self.path1.get())!=0:
path=os.path.join(self.path1.get(),'audio.mp3')
with open(path, 'wb') as f:
f.write(audio.content)
def interface(self):
# 制作界面
self.myWindow.title('语音演示程序') # 修改窗口标题
self.myWindow.geometry("800x400+350+200") # 修改窗口大小 width*height+xoffset+yoffset
# 创建两个标签容器
frm_l = LabelFrame(self.myWindow, text='语言识别', width=380, height=380)
frm_l.grid(row=0, column=0, ipadx=20, ipady=10, padx=10, pady=10)
frm_r = LabelFrame(self.myWindow, text='语言合成', width=380, height=380)
frm_r.grid(row=0, column=1, padx=10, pady=10)
entry1 = Entry(frm_l, textvariable=self.path, width=35).grid(row=0, column=0, rowspan=2, columnspan=4, sticky='EW', pady=5, padx=5)
button1 = Button(frm_l, text='选择文件', command=self.selectPath, width=10, height=1).grid(row=0, column=4, sticky='EW',pady=5, padx=5)
label_1 = Label(frm_l, text="音频格式").grid(row=2, column=0, sticky='EW', pady=5, padx=5)
com = ttk.Combobox(frm_l, textvariable=self.audio_formats, value=['PCM', 'WAV', 'AMR'], width=10, height=1)
com.current(0)
com.grid(row=2, column=1, sticky='EW', pady=5, padx=5)
button2 = Button(frm_l, text='开始识别', command=self.audio_recognition, width=10, height=1).grid(row=2, column=4,sticky='EW', pady=5,padx=5)
label_2 = Label(frm_l, text='识别结果').grid(row=4, column=0, sticky='EW', pady=5, padx=5)
self.text = Text(frm_l, width=40, height=15)
self.text.grid(row=5, column=0, rowspan=2, columnspan=5, sticky='EW', pady=5, padx=5)
# 语音合成模块
self.text1 = Text(frm_r, width=50, height=19)
self.text1.grid(row=0, column=0, rowspan=2, columnspan=4, sticky='EW', pady=5, padx=5)
entry1 = Entry(frm_r, textvariable=self.path1).grid(row=2, column=0, rowspan=1, columnspan=3,
sticky='EW', pady=5, padx=5)
button4=Button(frm_r, text='保存路径',command=self.savepath, width=10, height=1).grid(row=2, column=3, sticky='EW',pady=5, padx=5)
button3 = Button(frm_r, text='开始合成',command=self.audi_composite, width=10, height=1)
button3.grid(row=3, column=3, sticky='EW', pady=5, padx=5)
# 启动主窗口的消息循环
self.myWindow.mainloop()
if __name__ == '__main__':
audio=Audio()
audio.interface()