一个基于nao机器人平台的高校迎新智能语音对话机器人

一、利用nao录制语音

nao录制音频保存在本地

# -*- coding: UTF-8 -*-
#利用的naoqi中的ALProxy包

import argparse
from naoqi import ALProxy
import time

tts = audio = record = aup = None

def recordAudio(robot_IP, robot_PORT=9559):
    # ----------> Connect to robot <----------
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    audio = ALProxy("ALAudioDevice", robot_IP, robot_PORT)
    record = ALProxy("ALAudioRecorder", robot_IP, robot_PORT)
    aup = ALProxy("ALAudioPlayer", robot_IP, robot_PORT)
    # ----------> recording <----------0
    print 'start recording...'
    record_path = '/home/nao/record.wav'
    record.startMicrophonesRecording(record_path, 'wav', 16000, (0,0,1,0))  # 开始录音
    time.sleep(4)  # 录音时长,单位秒
    record.stopMicrophonesRecording()  # 结束录音
    print 'record over'

    #fileID = aup.playFile(record_path, 0.7, 0)

def getLanguage(robot_IP, robot_PORT=9559):
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    languageType = tts.getLanguage()
    print languageType
    return languageType

def record(IP):#录制的初始化
    parser = argparse.ArgumentParser()
    parser.add_argument("--ip", type=str, default="192.168.1.103", help="Robot ip address")#?
    parser.add_argument("--port", type=int, default=9559, help="Robot port number")#?
    args = parser.parse_args()
    getLanguage(IP)
    recordAudio(IP)

def say(text, IP):
    robot_IP = IP
    robot_PORT = 9559
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    tts.setLanguage("Chinese")
    tts.say(text)

二、nao语音文件下载

nao语音文件下载

# -*- coding:UTF-8 -*-
# 文件的下载我们用的是python的FTP协议,它的与nao的对话过程中没有实时性,我们也在寻求更好的解决方案答案,

from ftplib import FTP


def download(IP, user, passward, path, localFilePath='localRecord.wav', naoqiFile='record.wav'):
    ftp = FTP()
    ftp.connect(IP)
    ftp.login(user, passward)
    bufsize = 1024
    # ftp.nlst(path) 该函数用于获取目录下的文件
    with open(localFilePath, 'wb+') as f:
        ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
    ftp.quit()


def upload(IP, user, passward, path, localFilePath, naoqiFile):
    ftp = FTP()
    ftp.connect(IP)
    ftp.login(user, passward)
    bufsize = 1024
    with open(localFilePath, 'wb+') as f:
        ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
    ftp.quit()

三、nao调用百度语音识别api

这个百度里有详细的文档,demo教程

# coding=utf-8
#我在这里使用的是百度的语音识别

import sys
import json
import base64
import time
import re

IS_PY3 = sys.version_info.major == 3

if IS_PY3:
    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.error import URLError
    from urllib.parse import urlencode

    timer = time.perf_counter
else:
    from urllib2 import urlopen
    from urllib2 import Request
    from urllib2 import URLError
    from urllib import urlencode

    if sys.platform == "win32":
        timer = time.clock
    else:
        # On most other platforms the best timer is time.time()
        timer = time.time


class DemoError(Exception):
    pass


class VoiceToText:
    def __init__(self, API_KEY, SECRET_KEY, AUDIO_FILE):
        self.API_KEY = API_KEY
        self.SECRET_KEY = SECRET_KEY

        # 需要识别的文件
        self.AUDIO_FILE = AUDIO_FILE  # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
        # 文件格式
        self.FORMAT = self.AUDIO_FILE[-3:]  # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式

        self.CUID = '123456PYTHON'
        # 采样率
        self.RATE = 16000  # 固定值

        # 普通版

        self.DEV_PID = 1537  # 1537 表示识别普通话,使用输入法模型。根据文档填写PID,选择语言及识别模型
        self.ASR_URL = 'http://vop.baidu.com/server_api'
        self.SCOPE = 'audio_voice_assistant_get'  # 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有
        self.TOKEN_URL = 'http://openapi.baidu.com/oauth/2.0/token'

        # 测试自训练平台需要打开以下信息, 自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
        # DEV_PID = 8001 ;
        # LM_ID = 1234 ;

        # 极速版 打开注释的话请填写自己申请的appkey appSecret ,并在网页中开通极速版(开通后可能会收费)

        # DEV_PID = 80001
        # ASR_URL = 'http://vop.baidu.com/pro_api'
        # SCOPE = 'brain_enhanced_asr'  # 有此scope表示有极速版能力,没有请在网页里开通极速版

        # 忽略scope检查,非常旧的应用可能没有
        # SCOPE = False

    def fetch_token(self):
        params = {'grant_type': 'client_credentials',
                  'client_id': self.API_KEY,
                  'client_secret': self.SECRET_KEY}
        post_data = urlencode(params)
        if (IS_PY3):
            post_data = post_data.encode('utf-8')
        req = Request(self.TOKEN_URL, post_data)
        try:
            f = urlopen(req)
            result_str = f.read()
        except URLError as err:
            # if (IS_PY3):
            #     print('token http response http code : ' + str(err.code))
            # else:
            #     print 'token http response http code : ' + str(err.code)
            result_str = err.read()
        if (IS_PY3):
            result_str = result_str.decode()

        # if (IS_PY3):
        #     print(result_str)
        # else:
        #     print result_str
        result = json.loads(result_str)
        # if (IS_PY3):
        #     print(result)
        # else:
        #     print result
        if ('access_token' in result.keys() and 'scope' in result.keys()):
            # if (IS_PY3):
            #     print(self.SCOPE)
            # else:
            #     print self.SCOPE

            if self.SCOPE and (not self.SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查
                raise DemoError('scope is not correct')
            # if (IS_PY3):
            #     print('SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
            # else:
            #     print 'SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in'])
            return result['access_token']
        else:
            raise DemoError(
                'MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')

    def drive(self):
        token = self.fetch_token()
        speech_data = []
        with open(self.AUDIO_FILE, 'rb') as speech_file:
            speech_data = speech_file.read()

        length = len(speech_data)
        if length == 0:
            raise DemoError('file %s length read 0 bytes' % self.AUDIO_FILE)
        speech = base64.b64encode(speech_data)
        if (IS_PY3):
            speech = str(speech, 'utf-8')
        params = {'dev_pid': self.DEV_PID,
                  'format': self.FORMAT,
                  'rate': self.RATE,
                  'token': token,
                  'cuid': self.CUID,
                  'channel': 1,
                  'speech': speech,
                  'len': length
                  }
        post_data = json.dumps(params, sort_keys=False)
        # print post_data
        req = Request(self.ASR_URL, post_data.encode('utf-8'))
        req.add_header('Content-Type', 'application/json')
        try:
            begin = timer()
            f = urlopen(req)
            result_str = f.read()
            # if (IS_PY3):
            #     print ("Request time cost %f" % (timer() - begin))
            # else:
            #     print "Request time cost %f" % (timer() - begin)
        except URLError as err:
            # if (IS_PY3):
            #     print('asr http response http code : ' + str(err.code))
            # else:
            #     print 'asr http response http code : ' + str(err.code)
            result_str = err.read()

        if (IS_PY3):
            result_str = str(result_str, 'utf-8')
        # if (IS_PY3):
        #     print(result_str)
        # else:
        #     print result_str
        result = "".join(re.findall(r'''"result":\["(.*)"\]''', result_str))
        with open("result.txt", "w") as of:
            of.write(result)
        return result

if __name__ == '__main__':
    API_KEY = 'ffoFEhUX1cRGFW8Thl9Viatx'
    SECRET_KEY = 'TI4CUb2q32UpE7uGgg0ITP42apbeTCRQ'
    AUDIO_FILE = '/home/nao/chat/localRecord.wav'

    STT = VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)
    print STT.drive()

四、main主函数

可以将主函数和上面的每部分各写成一个.py文件
主函数前面的导包的各个包名就是.py文件的文件名。
我已经把我的机器人ip,apikey等去除,可以去百度、讯飞、图灵注册获取

# -*- coding: UTF-8 -*-
# import baiduAPI
import FTPdeal
import audio
import SpeechToText
import dialogue
import sys
import time
from naoqi import ALProxy

reload(sys)
sys.setdefaultencoding('utf-8')


def main():
    IP = "**"#填入机器人ip
    user = '**'#默认为nao
    passward = '**'#默认为nao
    port = 9559
    path = None

    # 智能回答配置
    #填入自己注册的机器人api
    apiKey = "******"
    userId = "**********"

    # 语音识别的配置
    #填入自己注册的机器人api
    API_KEY = '*********'
    SECRET_KEY = '*************************'
    AUDIO_FILE = '/home/nao/chat/localRecord.wav'#文件保存的位置
    STT = SpeechToText.VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)

    memory = ALProxy("ALMemory", IP, port)
    led = ALProxy("ALLeds", IP, port)
    #用于检测机器人的开关,我在这里用的是头顶三个传感器
    Front = 0#头顶前部传感器
    Front1 = 0#头顶中部传感器
    Front2 = 0#头顶后部传感器
    while True:
        Front = memory.getData("FrontTactilTouched")
        Front1 = memory.getData("RearTactilTouched")
        Front2 = memory.getData("MiddleTactilTouched")
        led.off("ChestLedsGreen")#关闭绿色胸灯
        led.on("ChestLedsBlue")#开启蓝色胸灯
        led.off("RightEarLeds")  # 右耳
        led.off("LeftEarLeds")  # 左耳
        if Front == 1:
            #触碰前部传感器完成一次回答
            led.off("ChestLedsBlue")  # 胸灯
            led.on("ChestLedsGreen")  # 胸灯
            led.on("RightEarLeds")  # 右耳
            led.on("LeftEarLeds")  # 左耳
            # 让机器人在开始听的状态耳朵灯亮
            # 停止听则熄灭
            audio.record(IP)  # 录音
            FTPdeal.download(IP, user, passward, path)  # 记录录音

            stringText = STT.drive()  # 语音转文字

            stringText = str(stringText)
            text = dialogue.dialogue(stringText, apiKey, userId)  # 匹配知识库,回答答案
            # print type(text), text
            text = str(text)
            audio.say(text, IP)  # 说话
        elif Front2 == 1:
            for i in range(3):
                # 触碰头顶前部传感器完成三次回答
                #需要更多次的连续回答,修改循环次数即可
                led.off("ChestLedsBlue")  # 胸灯
                led.on("ChestLedsGreen")  # 胸灯
                led.on("RightEarLeds")  # 右耳
                led.on("LeftEarLeds")  # 左耳
                # 让机器人在开始听的状态耳朵灯亮
                # 停止听则熄灭
                audio.record(IP)  # 录音
                FTPdeal.download(IP, user, passward, path)  # 记录录音

                stringText = STT.drive()  # 语音转文字

                stringText = str(stringText)
                text = dialogue.dialogue(stringText, apiKey, userId)  # 匹配知识库,回答答案
                # print type(text), text
                text = str(text)
                audio.say(text, IP)  # 说话
                time.sleep(1)

        elif Front1 == 1:
            break
            #敲击头顶后部传感器结束对话程序

    audio.say("再见,再见", IP)


if __name__ == "__main__":
    main()

在这里插入图片描述

在这里插入图片描述

nao机器人学习过程中java代码 package com.aldebaran.proxy; import com.aldebaran.proxy.Variant; import com.aldebaran.proxy.ALProxy; public class ALTextToSpeechProxy extends ALProxy { static { System.loadLibrary("jnaoqi"); } public ALProxy proxy; /// /// Default Constructor. /// public ALTextToSpeechProxy(String ip, int port) { super("ALTextToSpeech", ip, port); } /// /// Disables the notifications puted in ALMemory during the synthesis (TextStarted, TextDone, CurrentBookMark, CurrentWord, ...) /// public void disableNotifications() { Variant result = call("disableNotifications" ); // no return value } /// /// Enables the notifications puted in ALMemory during the synthesis (TextStarted, TextDone, CurrentBookMark, CurrentWord, ...) /// public void enableNotifications() { Variant result = call("enableNotifications" ); // no return value } /// /// Exits and unregisters the module. /// public void exit() { Variant result = call("exit" ); // no return value } /// /// Outputs the languages installed on the system. /// /// Array of std::string that contains the languages installed on the system. public String[] getAvailableLanguages() { Variant result = call("getAvailableLanguages" ); return (String[]) result.toStringArray(); } /// /// Outputs the available voices. The returned list contains the voice IDs. /// /// Array of std::string containing the voices installed on the system. public String[] getAvailableVoices() { Variant result = call("getAvailableVoices" ); return (String[]) result.toStringArray(); } /// /// Gets the name of the parent broker. /// /// The name of the parent broker. public String getBrokerName() { Variant result = call("getBrokerName" ); return result.toString(); } /// /// Returns the language currently used by the text-to-speech engine. /// /// Language of the current voice. public String getLanguage() { Variant result = call("getLanguage" ); return result.toString(); } /// /// Returns the encoding that should be used with the specified language. /// /// Language name (as a std::string). Must belong to the languages available in TTS. /// Encoding of the specified language. public String getLanguageEncoding( String pLanguage) { Variant vpLanguage; vpLanguage = new Variant(pLanguage); Variant result = call("getLanguageEncoding" ,vpLanguage); return result.toString(); } /// /// Retrieves a method's description. /// /// The name of the method. /// A structure containing the method's description. public Variant getMethodHelp( String methodName) { Variant vmethodName; vmethodName = new Variant(methodName); Variant result = call("getMethodHelp" ,vmethodName); return result; } /// /// Retrieves the module's method list. /// /// An array of method names. public String[] getMethodList() { Variant result = call("getMethodList" ); return (String[]) result.toStringArray(); } /// /// Retrieves the module's description. /// /// A structure describing the module. public Variant getModuleHelp() { Variant result = call("getModuleHelp" ); return result; } /// /// Returns the value of one of the voice parameters. The available parameters are: \"pitchShift\", \"doubleVoice\",\"doubleVoiceLevel\" and \"doubleVoiceTimeShift\" /// /// Name of the parameter. /// Value of the specified parameter public float getParameter( String pParameterName) { Variant vpParameterName; vpParameterName = new Variant(pParameterName); Variant result = call("getParameter" ,vpParameterName); return result.toFloat(); } /// /// Gets the method usage string. This summarises how to use the method. /// /// The name of the method. /// A string that summarises the usage of the method. public String getUsage( String name) { Variant vname; vname = new Variant(name); Variant result = call("getUsage" ,vname); return result.toString(); } /// /// Returns the voice currently used by the text-to-speech engine. /// /// Name of the current voice public String getVoice() { Variant result = call("getVoice" ); return result.toString(); } /// /// Fetches the current volume the text to speech. /// /// Volume (integer between 0 and 100). public float getVolume() { Variant result = call("getVolume" ); return result.toFloat(); } /// /// Returns true if the method is currently running. /// /// The ID of the method that was returned when calling the method using 'post' /// True if the method is currently running public Boolean isRunning( int id) { Variant vid; vid = new Variant(id); Variant result = call("isRunning" ,vid); return result.toBoolean(); } /// /// Loads a set of voice parameters defined in a xml file contained in the preferences folder.The name of the xml file must begin with ALTextToSpeech_Voice_ /// /// Name of the voice preference. public void loadVoicePreference( String pPreferenceName) { Variant vpPreferenceName; vpPreferenceName = new Variant(pPreferenceName); Variant result = call("loadVoicePreference" ,vpPreferenceName); // no return value } /// /// Just a ping. Always returns true /// /// returns true public Boolean ping() { Variant result = call("ping" ); return result.toBoolean(); } /// /// Performs the text-to-speech operations : it takes a std::string as input and outputs a sound in both speakers. It logs an error if the std::string is empty. String encoding must be UTF8. /// /// Text to say, encoded in UTF-8. public void say( String StringToSay) { Variant vstringToSay; vstringToSay = new Variant(StringToSay); Variant result = call("say" ,vstringToSay); // no return value } /// /// Performs the text-to-speech operations: it takes a std::string as input and outputs the corresponding audio signal in the specified file. /// /// Text to say, encoded in UTF-8. /// RAW file where to store the generated signal. The signal is encoded with a sample rate of 22050Hz, format S16_LE, 2 channels. public void sayToFile( String pStringToSay, String pFileName) { Variant vpStringToSay; vpStringToSay = new Variant(pStringToSay); Variant vpFileName; vpFileName = new Variant(pFileName); Variant result = call("sayToFile" ,vpStringToSay, vpFileName); // no return value } /// /// This method performs the text-to-speech operations: it takes a std::string, outputs the synthesis resulting audio signal in a file, and then plays the audio file. The file is deleted afterwards. It is useful when you want to perform a short synthesis, when few CPU is available. Do not use it if you want a low-latency synthesis or to synthesize a long std::string. /// /// Text to say, encoded in UTF-8. public void sayToFileAndPlay( String pStringToSay) { Variant vpStringToSay; vpStringToSay = new Variant(pStringToSay); Variant result = call("sayToFileAndPlay" ,vpStringToSay); // no return value } /// /// Changes the language used by the Text-to-Speech engine. It automatically changes the voice used since each of them is related to a unique language. If you want that change to take effect automatically after reboot of your robot, refer to the robot web page (setting page). /// /// Language name. Must belong to the languages available in TTS (can be obtained with the getAvailableLanguages method). It should be an identifier std::string. public void setLanguage( String pLanguage) { Variant vpLanguage; vpLanguage = new Variant(pLanguage); Variant result = call("setLanguage" ,vpLanguage); // no return value } /// /// Changes the parameters of the voice. The available parameters are: /// /// pitchShift: applies a pitch shifting to the voice. The value indicates the ratio between the new fundamental frequencies and the old ones (examples: 2.0: an octave above, 1.5: a quint above). Correct range is (1.0 -- 4), or 0 to disable effect. /// /// doubleVoice: adds a second voice to the first one. The value indicates the ratio between the second voice fundamental frequency and the first one. Correct range is (1.0 -- 4), or 0 to disable effect /// /// doubleVoiceLevel: the corresponding value is the level of the double voice (1.0: equal to the main voice one). Correct range is (0 -- 4). /// /// doubleVoiceTimeShift: the corresponding value is the delay between the double voice and the main one. Correct range is (0 -- 0.5) /// /// If the effect value is not available, the effect parameter remains unchanged. /// /// Name of the parameter. /// Value of the parameter. public void setParameter( String pEffectName, float pEffectValue) { Variant vpEffectName; vpEffectName = new Variant(pEffectName); Variant vpEffectValue; vpEffectValue = new Variant(pEffectValue); Variant result = call("setParameter" ,vpEffectName, vpEffectValue); // no return value } /// /// Changes the voice used by the text-to-speech engine. The voice identifier must belong to the installed voices, that can be listed using the 'getAvailableVoices' method. If the voice is not available, it remains unchanged. No exception is thrown in this case. For the time being, only two voices are available by default : Kenny22Enhanced (English voice) and Julie22Enhanced (French voice) /// /// The voice (as a std::string). public void setVoice( String pVoiceID) { Variant vpVoiceID; vpVoiceID = new Variant(pVoiceID); Variant result = call("setVoice" ,vpVoiceID); // no return value } /// /// Sets the volume of text-to-speech output. /// /// Volume (between 0.0 and 1.0). public void setVolume( float volume) { Variant vvolume; vvolume = new Variant(volume); Variant result = call("setVolume" ,vvolume); // no return value } /// /// returns true if the method is currently running /// /// the ID of the method to wait for public void stop( int id) { Variant vid; vid = new Variant(id); Variant result = call("stop" ,vid); // no return value } /// /// This method stops the current and all the pending tasks immediately. /// public void stopAll() { Variant result = call("stopAll" ); // no return value } /// /// Returns the version of the module. /// /// A string containing the version of the module. public String version() { Variant result = call("version" ); return result.toString(); } /// /// Wait for the end of a long running method that was called using 'post' /// /// The ID of the method that was returned when calling the method using 'post' /// The timeout period in ms. To wait indefinately, use a timeoutPeriod of zero. /// True if the timeout period terminated. False if the method returned. public Boolean wait( int id, int timeoutPeriod) { Variant vid; vid = new Variant(id); Variant vtimeoutPeriod; vtimeoutPeriod = new Variant(timeoutPeriod); Variant result = call("wait" ,vid, vtimeoutPeriod); return result.toBoolean(); } }
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值