一、利用nao录制语音
nao录制音频保存在本地
# -*- coding: UTF-8 -*-
#利用的naoqi中的ALProxy包
import argparse
from naoqi import ALProxy
import time
tts = audio = record = aup = None
def recordAudio(robot_IP, robot_PORT=9559):
# ----------> Connect to robot <----------
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
audio = ALProxy("ALAudioDevice", robot_IP, robot_PORT)
record = ALProxy("ALAudioRecorder", robot_IP, robot_PORT)
aup = ALProxy("ALAudioPlayer", robot_IP, robot_PORT)
# ----------> recording <----------0
print 'start recording...'
record_path = '/home/nao/record.wav'
record.startMicrophonesRecording(record_path, 'wav', 16000, (0,0,1,0)) # 开始录音
time.sleep(4) # 录音时长,单位秒
record.stopMicrophonesRecording() # 结束录音
print 'record over'
#fileID = aup.playFile(record_path, 0.7, 0)
def getLanguage(robot_IP, robot_PORT=9559):
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
languageType = tts.getLanguage()
print languageType
return languageType
def record(IP):#录制的初始化
parser = argparse.ArgumentParser()
parser.add_argument("--ip", type=str, default="192.168.1.103", help="Robot ip address")#?
parser.add_argument("--port", type=int, default=9559, help="Robot port number")#?
args = parser.parse_args()
getLanguage(IP)
recordAudio(IP)
def say(text, IP):
robot_IP = IP
robot_PORT = 9559
tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
tts.setLanguage("Chinese")
tts.say(text)
二、nao语音文件下载
nao语音文件下载
# -*- coding:UTF-8 -*-
# 文件的下载我们用的是python的FTP协议,它的与nao的对话过程中没有实时性,我们也在寻求更好的解决方案答案,
from ftplib import FTP
def download(IP, user, passward, path, localFilePath='localRecord.wav', naoqiFile='record.wav'):
ftp = FTP()
ftp.connect(IP)
ftp.login(user, passward)
bufsize = 1024
# ftp.nlst(path) 该函数用于获取目录下的文件
with open(localFilePath, 'wb+') as f:
ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
ftp.quit()
def upload(IP, user, passward, path, localFilePath, naoqiFile):
ftp = FTP()
ftp.connect(IP)
ftp.login(user, passward)
bufsize = 1024
with open(localFilePath, 'wb+') as f:
ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
ftp.quit()
三、nao调用百度语音识别api
这个百度里有详细的文档,demo教程
# coding=utf-8
#我在这里使用的是百度的语音识别
import sys
import json
import base64
import time
import re
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode
timer = time.perf_counter
else:
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode
if sys.platform == "win32":
timer = time.clock
else:
# On most other platforms the best timer is time.time()
timer = time.time
class DemoError(Exception):
pass
class VoiceToText:
def __init__(self, API_KEY, SECRET_KEY, AUDIO_FILE):
self.API_KEY = API_KEY
self.SECRET_KEY = SECRET_KEY
# 需要识别的文件
self.AUDIO_FILE = AUDIO_FILE # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
# 文件格式
self.FORMAT = self.AUDIO_FILE[-3:] # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
self.CUID = '123456PYTHON'
# 采样率
self.RATE = 16000 # 固定值
# 普通版
self.DEV_PID = 1537 # 1537 表示识别普通话,使用输入法模型。根据文档填写PID,选择语言及识别模型
self.ASR_URL = 'http://vop.baidu.com/server_api'
self.SCOPE = 'audio_voice_assistant_get' # 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有
self.TOKEN_URL = 'http://openapi.baidu.com/oauth/2.0/token'
# 测试自训练平台需要打开以下信息, 自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
# DEV_PID = 8001 ;
# LM_ID = 1234 ;
# 极速版 打开注释的话请填写自己申请的appkey appSecret ,并在网页中开通极速版(开通后可能会收费)
# DEV_PID = 80001
# ASR_URL = 'http://vop.baidu.com/pro_api'
# SCOPE = 'brain_enhanced_asr' # 有此scope表示有极速版能力,没有请在网页里开通极速版
# 忽略scope检查,非常旧的应用可能没有
# SCOPE = False
def fetch_token(self):
params = {'grant_type': 'client_credentials',
'client_id': self.API_KEY,
'client_secret': self.SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode('utf-8')
req = Request(self.TOKEN_URL, post_data)
try:
f = urlopen(req)
result_str = f.read()
except URLError as err:
# if (IS_PY3):
# print('token http response http code : ' + str(err.code))
# else:
# print 'token http response http code : ' + str(err.code)
result_str = err.read()
if (IS_PY3):
result_str = result_str.decode()
# if (IS_PY3):
# print(result_str)
# else:
# print result_str
result = json.loads(result_str)
# if (IS_PY3):
# print(result)
# else:
# print result
if ('access_token' in result.keys() and 'scope' in result.keys()):
# if (IS_PY3):
# print(self.SCOPE)
# else:
# print self.SCOPE
if self.SCOPE and (not self.SCOPE in result['scope'].split(' ')): # SCOPE = False 忽略检查
raise DemoError('scope is not correct')
# if (IS_PY3):
# print('SUCCESS WITH TOKEN: %s EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
# else:
# print 'SUCCESS WITH TOKEN: %s EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in'])
return result['access_token']
else:
raise DemoError(
'MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
def drive(self):
token = self.fetch_token()
speech_data = []
with open(self.AUDIO_FILE, 'rb') as speech_file:
speech_data = speech_file.read()
length = len(speech_data)
if length == 0:
raise DemoError('file %s length read 0 bytes' % self.AUDIO_FILE)
speech = base64.b64encode(speech_data)
if (IS_PY3):
speech = str(speech, 'utf-8')
params = {'dev_pid': self.DEV_PID,
'format': self.FORMAT,
'rate': self.RATE,
'token': token,
'cuid': self.CUID,
'channel': 1,
'speech': speech,
'len': length
}
post_data = json.dumps(params, sort_keys=False)
# print post_data
req = Request(self.ASR_URL, post_data.encode('utf-8'))
req.add_header('Content-Type', 'application/json')
try:
begin = timer()
f = urlopen(req)
result_str = f.read()
# if (IS_PY3):
# print ("Request time cost %f" % (timer() - begin))
# else:
# print "Request time cost %f" % (timer() - begin)
except URLError as err:
# if (IS_PY3):
# print('asr http response http code : ' + str(err.code))
# else:
# print 'asr http response http code : ' + str(err.code)
result_str = err.read()
if (IS_PY3):
result_str = str(result_str, 'utf-8')
# if (IS_PY3):
# print(result_str)
# else:
# print result_str
result = "".join(re.findall(r'''"result":\["(.*)"\]''', result_str))
with open("result.txt", "w") as of:
of.write(result)
return result
if __name__ == '__main__':
API_KEY = 'ffoFEhUX1cRGFW8Thl9Viatx'
SECRET_KEY = 'TI4CUb2q32UpE7uGgg0ITP42apbeTCRQ'
AUDIO_FILE = '/home/nao/chat/localRecord.wav'
STT = VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)
print STT.drive()
四、main主函数
可以将主函数和上面的每部分各写成一个.py文件
主函数前面的导包的各个包名就是.py文件的文件名。
我已经把我的机器人ip,apikey等去除,可以去百度、讯飞、图灵注册获取
# -*- coding: UTF-8 -*-
# import baiduAPI
import FTPdeal
import audio
import SpeechToText
import dialogue
import sys
import time
from naoqi import ALProxy
reload(sys)
sys.setdefaultencoding('utf-8')
def main():
IP = "**"#填入机器人ip
user = '**'#默认为nao
passward = '**'#默认为nao
port = 9559
path = None
# 智能回答配置
#填入自己注册的机器人api
apiKey = "******"
userId = "**********"
# 语音识别的配置
#填入自己注册的机器人api
API_KEY = '*********'
SECRET_KEY = '*************************'
AUDIO_FILE = '/home/nao/chat/localRecord.wav'#文件保存的位置
STT = SpeechToText.VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)
memory = ALProxy("ALMemory", IP, port)
led = ALProxy("ALLeds", IP, port)
#用于检测机器人的开关,我在这里用的是头顶三个传感器
Front = 0#头顶前部传感器
Front1 = 0#头顶中部传感器
Front2 = 0#头顶后部传感器
while True:
Front = memory.getData("FrontTactilTouched")
Front1 = memory.getData("RearTactilTouched")
Front2 = memory.getData("MiddleTactilTouched")
led.off("ChestLedsGreen")#关闭绿色胸灯
led.on("ChestLedsBlue")#开启蓝色胸灯
led.off("RightEarLeds") # 右耳
led.off("LeftEarLeds") # 左耳
if Front == 1:
#触碰前部传感器完成一次回答
led.off("ChestLedsBlue") # 胸灯
led.on("ChestLedsGreen") # 胸灯
led.on("RightEarLeds") # 右耳
led.on("LeftEarLeds") # 左耳
# 让机器人在开始听的状态耳朵灯亮
# 停止听则熄灭
audio.record(IP) # 录音
FTPdeal.download(IP, user, passward, path) # 记录录音
stringText = STT.drive() # 语音转文字
stringText = str(stringText)
text = dialogue.dialogue(stringText, apiKey, userId) # 匹配知识库,回答答案
# print type(text), text
text = str(text)
audio.say(text, IP) # 说话
elif Front2 == 1:
for i in range(3):
# 触碰头顶前部传感器完成三次回答
#需要更多次的连续回答,修改循环次数即可
led.off("ChestLedsBlue") # 胸灯
led.on("ChestLedsGreen") # 胸灯
led.on("RightEarLeds") # 右耳
led.on("LeftEarLeds") # 左耳
# 让机器人在开始听的状态耳朵灯亮
# 停止听则熄灭
audio.record(IP) # 录音
FTPdeal.download(IP, user, passward, path) # 记录录音
stringText = STT.drive() # 语音转文字
stringText = str(stringText)
text = dialogue.dialogue(stringText, apiKey, userId) # 匹配知识库,回答答案
# print type(text), text
text = str(text)
audio.say(text, IP) # 说话
time.sleep(1)
elif Front1 == 1:
break
#敲击头顶后部传感器结束对话程序
audio.say("再见,再见", IP)
if __name__ == "__main__":
main()