一、nao录制语音
主要是利用的naoqi中的ALProxy包
# -*- coding: UTF-8 -*-
import argparse
from naoqi import ALProxy
import time
tts = audio = record = aup = None
def recordAudio(robot_IP, robot_PORT=9559):
# ----------> Connect to robot <----------
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
audio = ALProxy("ALAudioDevice", robot_IP, robot_PORT)
record = ALProxy("ALAudioRecorder", robot_IP, robot_PORT)
aup = ALProxy("ALAudioPlayer", robot_IP, robot_PORT)
# ----------> recording <----------0
print 'start recording...'
record_path = '/home/nao/record.wav'
record.startMicrophonesRecording(record_path, 'wav', 16000, (0,0,1,0))
time.sleep(5)
record.stopMicrophonesRecording()
print 'record over'
#fileID = aup.playFile(record_path, 0.7, 0)
def getLanguage(robot_IP, robot_PORT=9559):
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
languageType = tts.getLanguage()
print languageType
return languageType
def record(IP):#录制的初始化
parser = argparse.ArgumentParser()
parser.add_argument("--ip", type=str, default="192.168.43.135", help="Robot ip address")#?
parser.add_argument("--port", type=int, default=9559, help="Robot port number")#?
args = parser.parse_args()
getLanguage(IP)
recordAudio(IP)
def say(text, IP):
robot_IP = IP
robot_PORT = 9559
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
tts.setLanguage("Chinese")
tts.say(text)
二、nao语音文件下载
文件的下载我们用的是python的FTP协议,它的与nao的对话过程中没有实时性,我们也在寻求更好的解决方案答案,也希望大神提供帮助。如果想要详细了解python的FTP协调,请自行百度,会有很多的典例代码。下面的download函数、upload函数对于我们的实验已经够用。
# -*- coding:UTF-8 -*-
from ftplib import FTP
def download(IP, user, passward, path, localFilePath = 'localRecord.wav', naoqiFile = 'record.wav'):
ftp = FTP()
ftp.connect(IP)
ftp.login(user, passward)
bufsize = 1024
#ftp.nlst(path) 该函数用于获取目录下的文件,但不知道为啥加与不加都无所谓。
with open(localFilePath, 'wb+') as f:
ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
ftp.quit()
def upload(IP, user, passward, path, localFilePath, naoqiFile):
ftp = FTP()
ftp.connect(IP)
ftp.login(user, passward)
bufsize = 1024
with open(localFilePath, 'wb+') as f:
ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
ftp.quit()
三、nao调用百度API
这是根据naoqi之家的学习得来的。
http://naoqi.net/
naoqi之家https://blog.csdn.net/weixin_43509791
以下代码文件包含:
- 语音识别
- 文本翻译
百度都提供了文档教程:
语音识别https://ai.baidu.com/docs#/UNIT-v2-guide/top
翻译http://api.fanyi.baidu.com/api/trans/product/apidoc
代码的ID部分我都删除了,可以去百度注册获取。
# -*- coding: UTF-8 -*-
from aip import AipSpeech
import httplib
import hashlib#解密加密的
import urllib
import random
import json
#import langid # 识别语言类型
def translate(text):
appid = ''
secretKey = ''
httpClient = None
myurl = '/api/trans/vip/translate'
fromLang = 'en'
toLang = 'zh'
salt = random.randint(32768, 65536)
sign = appid + text + str(salt) + secretKey
m1 = hashlib.md5()
#m1.update
m1.update(sign)
sign = m1.hexdigest()
myurl = myurl + '?appid=' + appid + '&q=' + urllib.quote(text) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(
salt) + '&sign=' + sign
try:
httpClient = httplib.HTTPConnection('api.fanyi.baidu.com')
httpClient.request('GET', myurl)
# response是HTTPResponse对象
response = httpClient.getresponse()
response_text = response.read()
# print response_text
response_json = json.loads(response_text)
re_text = response_json['trans_result'][0]['dst']
print re_text
return re_text
except Exception, e:
print "error",e
finally:
if httpClient:
httpClient.close()
def voiceToText(localPath = 'localRecord.wav', languageType = 'English'):
languageTypeDict = {'Chinese':1536, 'ChinesePure':1537, 'English':1737}
""" 你的 APPID AK SK """
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
filePath = localPath
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
with open(filePath, 'rb') as fp:
read = fp.read()
#关于语言类型的选择可以用naoqi的getlanguage()来获取
result = client.asr(read, 'wav', 16000, {'dev_pid': languageTypeDict[languageType],})
'''client.asr(speech, format, rate, dev_pid)
speech: 语音文件(pcm||wav||amr类型)
format: 语音文件的格式 如上
rate: 固定值16000
dev_pid: 语言类型
'''
print result
text = result["result"][0]
print text
return text #可以将text传给baidufanyi.py
四、nao语音对话
nao的语音对话我们使用的是图灵机器人,其实它也可以做翻译功能,但是有两个缺点,一是翻译不准确,二是每天只有一次对话机会,实名认证之后有100次。下面代码的ID我也删除了,请自行在图灵机器人注册。我的实名认证还没有成功。
这个图灵也有官方文档,网上也有大牛写的教程。
但是,如果不希望再使用图灵机器人的,可以看一下百度UNIT,这个是企业注册,我用我们学校也注册成功了,它的文档教程很详细,但是没有教如何链接外部函数,实现我们所需的功能,可以深入研究一下。
# -*- coding: UTF-8 -*-
import json
import urllib2
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def dialogue(text):
api_url = "http://openapi.tuling123.com/openapi/api/v2"
text_dialogue = text
req = {
"perception":
{
"inputText":
{
"text": text_dialogue
},
"selfInfo":
{
"location":
{
"city": "shanghai",
"province": "shanghai",
"street": "wenhualu"
}
}
},
"userInfo":
{
"apiKey": "",
"userId": ""
}
}
# print(req)
# 将字典格式的req编码为utf8
req = json.dumps(req).encode('utf8')
# print(req)
http_post = urllib2.Request(api_url, data=req, headers={'content-type': 'application/json'})
response = urllib2.urlopen(http_post)
response_str = response.read().decode('utf8')
# print(response_str)
response_dic = json.loads(response_str)
# print(response_dic)
print 'response_dic:', response_dic
print 'response_str:', response_str
intent_code = response_dic['intent']['code']
results_text = response_dic['results'][0]['values']['text']
print 'Turing的回答:'
#print 'code:' + str(intent_code)
print 'text:' + results_text
return results_text
#a = raw_input("I:")
#dialogue(a)
五、识别语言类型
这个用的python的langid库,用法就那一个函数。详解见baidu
# -*- coding: UTF-8 -*-
import langid
#s1 = '你们好啊'
#s2 = 'We are pleased to introduce today a new technology'
def getLanguageType(text):
languageType = langid.classify(text)
print languageType
return languageType
#re1 = langid.classify(s1)
#re2 = langid.classify(s2)
#print re1, re2
六、main主函数
主函数对于每个功能只能运行一遍,这些都需要完善。可以写个循环,对每个输入的语音识别成字符串,对字符串进行判断,然后跳转。这只是个建议。估计naoqi里面也有类似的。
可以将主函数和上面的每部分各写成一个.py文件,因为我就是这么做的。主函数前面的导包的各个包名就是.py文件的文件名。
# -*- coding: UTF-8 -*-
import baiduAPI
import FTPdeal
import audio
import languageType
import dialogue
def main():
IP = "192.168.43.135"
user = 'nao'
passward = 'nao'
path = None
audio.record(IP)
FTPdeal.download(IP, user, passward, path)
stringText = baiduAPI.voiceToText(languageType = 'Chinese')
'''
if stringText == '翻译' or stringText == 'translate':
baiduAPI.translate(stringText)
elif stringText == '对话':
dialogue.dialogue(stringText)
'''
'''
text = baiduAPI.translate(stringText)
print type(text), text
text = str(text)
audio.say(text, IP)
'''
stringText = str(stringText)
text = dialogue.dialogue(stringText)
print type(text), text
text = str(text)
audio.say(text, IP)
if __name__ == "__main__":
main()