实现freeswitch和文心一言语音对话_freeswitch 文心一言-CSDN博客

本文链接：https://blog.csdn.net/qq_65615440/article/details/143335733

原理：使用分机拨号，freeswitch将语音用ASR识别出来，将文字发送给文心一言，再将文心一言返回的结果使用TTS播放出来

一、配置百度MRCP

在服务器上安装百度的MRCP，可以参考这篇文章，写的很详细

https://github.com/zhlii/knowledge/blob/master/freeswitch/%E9%83%A8%E7%BD%B2%E7%99%BE%E5%BA%A6mrcpserver.md

配置完后可以测试一下看看ASR可以可以正常运行，然后再配置TTS，这里我使用的是tts_commandline模块，可以参考一下我之前发的文章

freeswitch使用百度TTS-CSDN博客

二、申请百度智能云千帆大模型

百度智能云千帆大模型平台

到百度智能云申请大模型服务，然后创建应用生成密钥，以下是免费申请的一些服务

三、创建脚本，对接文心一言

这里我用的python脚本（记得freeswitch安装python模块），我加上了一些注释，还有一些可选的参数可以去百度智能云的API手册上查看。把代码中的client_id和client_secret替换成自己的即可，我这里使用的是ERNIE-4.0-8K模型，如果使用其他的模型可能需要更换url链接，具体的可去手册查看

#! /usr/bin/python2.7
# coding=utf-8

#实现freeswitch和文心一言语音对话

from freeswitch import *
import requests
import json
import xml.etree.ElementTree as ET

welcome = "/usr/local/freeswitch/sounds/zh/cn/link/ivr/8000/ivr-welcome_to_freeswitch.wav"
grammar="baidu"
no_input_timeout = "80000"
recognition_timeout = "80000"

# 获取token进行鉴权
def get_access_token():
    url = "https://aip.baidubce.com/oauth/2.0/token?client_id=填自己控制台上的API Key&client_secret=填自己控制台上的Secret Key" + \
          "&grant_type=client_credentials"
    payload = json.dumps("")
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }

    response = requests.request("POST", url, headers = headers, data = payload)
    return response.json().get("access_token")

# 发送数据，获取返回结果
def fs_ai(sentence):
    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token()
    payload = json.dumps({
        # 用户唯一标识符
        "user_id": "2872",
        # 聊天上下文信息
        "messages": [
            {
                # 分为用户和对话助手
                "role": "user",
                "content": sentence
            }
        ],
        # 输出随机度
        "temperature": 0.95,
        # 输出文本的多样性
        "top_p": 0.8,
        # 对生成的token增加惩罚
        "penalty_score": 1,
        # 是否开启系统记忆
        "enable_system_memory": False,
        # 是否强制关闭实时搜索
        "disable_search": False,
        # 是否开启上角标返回
        "enable_citation": False
    })
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
    }
    response = requests.request("POST", url, headers = headers, data = payload)

    #解析返回的结果，将json转换为python中的字典，返回result键对应的值
    response_result = json.loads(response.text)
    result = response_result.get("result")

    if result is None:
        consoleLog("ERROR","result Error")
        return None

    # 获取千帆返回的数据
    # fd = open('/usr/local/src/test_lang/result.txt', 'a')
    #传递给freeswitch一个格式化字符串，需要转换成普通字符串，否则会报错
    consoleLog("INFO", str("result is: %s" % result))
    # fd.write("\n"+result)
    # fd.close()
    return result

#启动函数
def handler(session, args):
    while 1:
        session.execute("play_and_detect_speech", welcome + " detect:unimrcp {start-input-timers=false,no-input-timeout=" + no_input_timeout + ",recognition-timeout=" + recognition_timeout + "} " + grammar)
        result_xml = session.getVariable("detect_speech_result")

        if result_xml is None:
            consoleLog("ERROR","xml is None,detect error")
            return

        consoleLog("INFO", "xml is: %s" % result_xml)
        #解析XML，获取语音识别结果
        root=ET.fromstring(result_xml)
        sentence=root.find(".//speech-to-text")
        if sentence is not None:
            #<speech-to-text confidence="100">[2d047b7c9254416b_2_1]我要去西二旗我要去西二旗</speech-to-text>
            result=sentence.text[22:]
            consoleLog("INFO",str("detect_speech_result is: %s" % result))
        else:
            consoleLog("ERROR","detect_speech_result is None")
            return

        result = fs_ai(result)
        if result is not None:
            #使用tts_commandline播放结果
            session.execute("speak", str("tts_commandline|4144|" + result))