一个基于nao机器人平台的高校迎新智能语音对话机器人

最新推荐文章于 2022-06-12 22:44:19 发布

yvessaid

最新推荐文章于 2022-06-12 22:44:19 发布

阅读量1.4k

点赞数 4

分类专栏：语音

本文链接：https://blog.csdn.net/m0_46447206/article/details/108625469

版权

语音专栏收录该内容

1 篇文章 1 订阅

订阅专栏

一、利用nao录制语音

nao录制音频保存在本地

# -*- coding: UTF-8 -*-
#利用的naoqi中的ALProxy包

import argparse
from naoqi import ALProxy
import time

tts = audio = record = aup = None

def recordAudio(robot_IP, robot_PORT=9559):
    # ----------> Connect to robot <----------
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    audio = ALProxy("ALAudioDevice", robot_IP, robot_PORT)
    record = ALProxy("ALAudioRecorder", robot_IP, robot_PORT)
    aup = ALProxy("ALAudioPlayer", robot_IP, robot_PORT)
    # ----------> recording <----------0
    print 'start recording...'
    record_path = '/home/nao/record.wav'
    record.startMicrophonesRecording(record_path, 'wav', 16000, (0,0,1,0))  # 开始录音
    time.sleep(4)  # 录音时长，单位秒
    record.stopMicrophonesRecording()  # 结束录音
    print 'record over'

    #fileID = aup.playFile(record_path, 0.7, 0)

def getLanguage(robot_IP, robot_PORT=9559):
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    languageType = tts.getLanguage()
    print languageType
    return languageType

def record(IP):#录制的初始化
    parser = argparse.ArgumentParser()
    parser.add_argument("--ip", type=str, default="192.168.1.103", help="Robot ip address")#?
    parser.add_argument("--port", type=int, default=9559, help="Robot port number")#?
    args = parser.parse_args()
    getLanguage(IP)
    recordAudio(IP)

def say(text, IP):
    robot_IP = IP
    robot_PORT = 9559
    tts = ALProxy("ALTextToSpeech", robot_IP, robot_PORT)
    tts.setLanguage("Chinese")
    tts.say(text)

二、nao语音文件下载

nao语音文件下载

# -*- coding:UTF-8 -*-
# 文件的下载我们用的是python的FTP协议，它的与nao的对话过程中没有实时性，我们也在寻求更好的解决方案答案，

from ftplib import FTP


def download(IP, user, passward, path, localFilePath='localRecord.wav', naoqiFile='record.wav'):
    ftp = FTP()
    ftp.connect(IP)
    ftp.login(user, passward)
    bufsize = 1024
    # ftp.nlst(path) 该函数用于获取目录下的文件
    with open(localFilePath, 'wb+') as f:
        ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
    ftp.quit()


def upload(IP, user, passward, path, localFilePath, naoqiFile):
    ftp = FTP()
    ftp.connect(IP)
    ftp.login(user, passward)
    bufsize = 1024
    with open(localFilePath, 'wb+') as f:
        ftp.retrbinary('RETR ' + naoqiFile, f.write, bufsize)
    ftp.quit()

三、nao调用百度语音识别api

这个百度里有详细的文档，demo教程

# coding=utf-8
#我在这里使用的是百度的语音识别

import sys
import json
import base64
import time
import re

IS_PY3 = sys.version_info.major == 3

if IS_PY3:
    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.error import URLError
    from urllib.parse import urlencode

    timer = time.perf_counter
else:
    from urllib2 import urlopen
    from urllib2 import Request
    from urllib2 import URLError
    from urllib import urlencode

    if sys.platform == "win32":
        timer = time.clock
    else:
        # On most other platforms the best timer is time.time()
        timer = time.time


class DemoError(Exception):
    pass


class VoiceToText:
    def __init__(self, API_KEY, SECRET_KEY, AUDIO_FILE):
        self.API_KEY = API_KEY
        self.SECRET_KEY = SECRET_KEY

        # 需要识别的文件
        self.AUDIO_FILE = AUDIO_FILE  # 只支持 pcm/wav/amr 格式，极速版额外支持m4a 格式
        # 文件格式
        self.FORMAT = self.AUDIO_FILE[-3:]  # 文件后缀只支持 pcm/wav/amr 格式，极速版额外支持m4a 格式

        self.CUID = '123456PYTHON'
        # 采样率
        self.RATE = 16000  # 固定值

        # 普通版

        self.DEV_PID = 1537  # 1537 表示识别普通话，使用输入法模型。根据文档填写PID，选择语言及识别模型
        self.ASR_URL = 'http://vop.baidu.com/server_api'
        self.SCOPE = 'audio_voice_assistant_get'  # 有此scope表示有asr能力，没有请在网页里勾选，非常旧的应用可能没有
        self.TOKEN_URL = 'http://openapi.baidu.com/oauth/2.0/token'

        # 测试自训练平台需要打开以下信息， 自训练平台模型上线后，您会看见 第二步：“”获取专属模型参数pid:8001，modelid:1234”，按照这个信息获取 dev_pid=8001，lm_id=1234
        # DEV_PID = 8001 ;
        # LM_ID = 1234 ;

        # 极速版 打开注释的话请填写自己申请的appkey appSecret ，并在网页中开通极速版（开通后可能会收费）

        # DEV_PID = 80001
        # ASR_URL = 'http://vop.baidu.com/pro_api'
        # SCOPE = 'brain_enhanced_asr'  # 有此scope表示有极速版能力，没有请在网页里开通极速版

        # 忽略scope检查，非常旧的应用可能没有
        # SCOPE = False

    def fetch_token(self):
        params = {'grant_type': 'client_credentials',
                  'client_id': self.API_KEY,
                  'client_secret': self.SECRET_KEY}
        post_data = urlencode(params)
        if (IS_PY3):
            post_data = post_data.encode('utf-8')
        req = Request(self.TOKEN_URL, post_data)
        try:
            f = urlopen(req)
            result_str = f.read()
        except URLError as err:
            # if (IS_PY3):
            #     print('token http response http code : ' + str(err.code))
            # else:
            #     print 'token http response http code : ' + str(err.code)
            result_str = err.read()
        if (IS_PY3):
            result_str = result_str.decode()

        # if (IS_PY3):
        #     print(result_str)
        # else:
        #     print result_str
        result = json.loads(result_str)
        # if (IS_PY3):
        #     print(result)
        # else:
        #     print result
        if ('access_token' in result.keys() and 'scope' in result.keys()):
            # if (IS_PY3):
            #     print(self.SCOPE)
            # else:
            #     print self.SCOPE

            if self.SCOPE and (not self.SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查
                raise DemoError('scope is not correct')
            # if (IS_PY3):
            #     print('SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
            # else:
            #     print 'SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in'])
            return result['access_token']
        else:
            raise DemoError(
                'MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')

    def drive(self):
        token = self.fetch_token()
        speech_data = []
        with open(self.AUDIO_FILE, 'rb') as speech_file:
            speech_data = speech_file.read()

        length = len(speech_data)
        if length == 0:
            raise DemoError('file %s length read 0 bytes' % self.AUDIO_FILE)
        speech = base64.b64encode(speech_data)
        if (IS_PY3):
            speech = str(speech, 'utf-8')
        params = {'dev_pid': self.DEV_PID,
                  'format': self.FORMAT,
                  'rate': self.RATE,
                  'token': token,
                  'cuid': self.CUID,
                  'channel': 1,
                  'speech': speech,
                  'len': length
                  }
        post_data = json.dumps(params, sort_keys=False)
        # print post_data
        req = Request(self.ASR_URL, post_data.encode('utf-8'))
        req.add_header('Content-Type', 'application/json')
        try:
            begin = timer()
            f = urlopen(req)
            result_str = f.read()
            # if (IS_PY3):
            #     print ("Request time cost %f" % (timer() - begin))
            # else:
            #     print "Request time cost %f" % (timer() - begin)
        except URLError as err:
            # if (IS_PY3):
            #     print('asr http response http code : ' + str(err.code))
            # else:
            #     print 'asr http response http code : ' + str(err.code)
            result_str = err.read()

        if (IS_PY3):
            result_str = str(result_str, 'utf-8')
        # if (IS_PY3):
        #     print(result_str)
        # else:
        #     print result_str
        result = "".join(re.findall(r'''"result":\["(.*)"\]''', result_str))
        with open("result.txt", "w") as of:
            of.write(result)
        return result

if __name__ == '__main__':
    API_KEY = 'ffoFEhUX1cRGFW8Thl9Viatx'
    SECRET_KEY = 'TI4CUb2q32UpE7uGgg0ITP42apbeTCRQ'
    AUDIO_FILE = '/home/nao/chat/localRecord.wav'

    STT = VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)
    print STT.drive()

四、main主函数

可以将主函数和上面的每部分各写成一个.py文件
主函数前面的导包的各个包名就是.py文件的文件名。
我已经把我的机器人ip,apikey等去除，可以去百度、讯飞、图灵注册获取

# -*- coding: UTF-8 -*-
# import baiduAPI
import FTPdeal
import audio
import SpeechToText
import dialogue
import sys
import time
from naoqi import ALProxy

reload(sys)
sys.setdefaultencoding('utf-8')


def main():
    IP = "**"#填入机器人ip
    user = '**'#默认为nao
    passward = '**'#默认为nao
    port = 9559
    path = None

    # 智能回答配置
    #填入自己注册的机器人api
    apiKey = "******"
    userId = "**********"

    # 语音识别的配置
    #填入自己注册的机器人api
    API_KEY = '*********'
    SECRET_KEY = '*************************'
    AUDIO_FILE = '/home/nao/chat/localRecord.wav'#文件保存的位置
    STT = SpeechToText.VoiceToText(API_KEY, SECRET_KEY, AUDIO_FILE)

    memory = ALProxy("ALMemory", IP, port)
    led = ALProxy("ALLeds", IP, port)
    #用于检测机器人的开关，我在这里用的是头顶三个传感器
    Front = 0#头顶前部传感器
    Front1 = 0#头顶中部传感器
    Front2 = 0#头顶后部传感器
    while True:
        Front = memory.getData("FrontTactilTouched")
        Front1 = memory.getData("RearTactilTouched")
        Front2 = memory.getData("MiddleTactilTouched")
        led.off("ChestLedsGreen")#关闭绿色胸灯
        led.on("ChestLedsBlue")#开启蓝色胸灯
        led.off("RightEarLeds")  # 右耳
        led.off("LeftEarLeds")  # 左耳
        if Front == 1:
            #触碰前部传感器完成一次回答
            led.off("ChestLedsBlue")  # 胸灯
            led.on("ChestLedsGreen")  # 胸灯
            led.on("RightEarLeds")  # 右耳
            led.on("LeftEarLeds")  # 左耳
            # 让机器人在开始听的状态耳朵灯亮
            # 停止听则熄灭
            audio.record(IP)  # 录音
            FTPdeal.download(IP, user, passward, path)  # 记录录音

            stringText = STT.drive()  # 语音转文字

            stringText = str(stringText)
            text = dialogue.dialogue(stringText, apiKey, userId)  # 匹配知识库，回答答案
            # print type(text), text
            text = str(text)
            audio.say(text, IP)  # 说话
        elif Front2 == 1:
            for i in range(3):
                # 触碰头顶前部传感器完成三次回答
                #需要更多次的连续回答，修改循环次数即可
                led.off("ChestLedsBlue")  # 胸灯
                led.on("ChestLedsGreen")  # 胸灯
                led.on("RightEarLeds")  # 右耳
                led.on("LeftEarLeds")  # 左耳
                # 让机器人在开始听的状态耳朵灯亮
                # 停止听则熄灭
                audio.record(IP)  # 录音
                FTPdeal.download(IP, user, passward, path)  # 记录录音

                stringText = STT.drive()  # 语音转文字

                stringText = str(stringText)
                text = dialogue.dialogue(stringText, apiKey, userId)  # 匹配知识库，回答答案
                # print type(text), text
                text = str(text)
                audio.say(text, IP)  # 说话
                time.sleep(1)

        elif Front1 == 1:
            break
            #敲击头顶后部传感器结束对话程序

    audio.say("再见，再见", IP)


if __name__ == "__main__":
    main()