flask ai 语音识别以及模拟人类说话

最新推荐文章于 2024-02-28 18:12:06 发布

BigC哥

最新推荐文章于 2024-02-28 18:12:06 发布

阅读量668

点赞数

本文链接：https://blog.csdn.net/weixin_43063753/article/details/88211199

版权

flask_语音识别以及合,模拟人类说话

调用百度语音识别以及合成在页面上返回

flask_app.py

from aip import AipSpeech
import os
""" 你的 APPID AK SK """
APP_ID = '156743074'
API_KEY = '7qgG 9Lcx8mvmj'
SECRET_KEY = 'ky q0zNDnRzG9rtn6upz0xip'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def text2audio(answer):
    # 语音合成
    result = client.synthesis(answer, 'zh', 1, {
        'vol': 5,
        "spd": 4,
        "pit": 9,
        "per": 4
    })

    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    from uuid import uuid4
    res_file_name = f"{uuid4()}.mp3"
    res_file_name = os.path.join(os.path.abspath('chat'), res_file_name)
    if not isinstance(result, dict):
        with open(res_file_name, 'wb') as f:
            f.write(result)
        return res_file_name


def audio2text(filePath):
    # 开始语音识别
    # 读取文件
    filePath = os.path.join(os.path.abspath('chat'), filePath)
    os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", 'rb') as fp:
        res = client.asr(fp.read(), 'pcm', 16000, {
            'dev_pid': 1537,
        })

    return res

my_nlp.py

from aip import AipNlp
import os
""" 你的 APPID AK SK """
APP_ID = '156740374'
API_KEY = '7qgG cVss7fc9Lx8mvmj'
SECRET_KEY = 'kyvKcM1 n6upz0xip'

nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

#图灵配置
import requests

dic = {
	"reqType":0,
    "perception": {
        "inputText": {
            "text": "你叫什么名字"
        }
    },
    "userInfo": {
        "apiKey": "e237357df4dd405f9b2dddd22320837a",
        "userId": "123123"
    }
}

def go_tuling(Q):
    dic["perception"]["inputText"]["text"] = Q
    res = requests.post("http://openapi.tuling123.com/openapi/api/v2",json=dic)
    res_dic = res.json()
    print(res_dic.get("results")[0]["values"]["text"])

    return res_dic.get("results")[0]["values"]["text"]



def my_nlp_func(text):
    # if "你叫什么名字" in text : # 你的名字是什么？ 你丫谁呀？ 您贵姓？ what's your name？
    #     # NLP 自然语言处理的问题
    #     return "我的名字叫小石榴"

    if nlp_client.simnet("你的名字是什么",text).get("score") >= 0.58:
        return "我的名字叫小石榴"

    if nlp_client.simnet("你今年多大了",text).get("score") >= 0.58:
        return "我今年10岁了"


    return go_tuling(text)

baidu_aip.py

from aip import AipSpeech
import os
""" 你的 APPID AK SK """
APP_ID = '15674374'
API_KEY = '7qgGbZ Lx8mvmj'
SECRET_KEY = 'kyvKM1 tn6upz0xip'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def text2audio(answer):
    # 语音合成
    result = client.synthesis(answer, 'zh', 1, {
        'vol': 5,
        "spd": 4,
        "pit": 9,
        "per": 4
    })

    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    from uuid import uuid4
    res_file_name = f"{uuid4()}.mp3"
    res_file_name = os.path.join(os.path.abspath('chat'), res_file_name)
    if not isinstance(result, dict):
        with open(res_file_name, 'wb') as f:
            f.write(result)
        return res_file_name


def audio2text(filePath):
    # 开始语音识别
    # 读取文件
    filePath = os.path.join(os.path.abspath('chat'), filePath)
    os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", 'rb') as fp:
        res = client.asr(fp.read(), 'pcm', 16000, {
            'dev_pid': 1537,
        })

    return res

前端

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<audio  autoplay controls id="player"></audio>
<button onclick="start_reco()">录音</button>
<button onclick="stop_reco()">停止</button>
</body>
<script type="text/javascript" src="/static/Recorder.js"></script>
<script type="text/javascript" src="/static/jquery-3.3.1.min.js"></script>
<script type="text/javascript">
    var reco = null;


    var audio_context = new AudioContext();//音频内容对象
    navigator.getUserMedia = (navigator.getUserMedia ||
        navigator.webkitGetUserMedia ||
        navigator.mozGetUserMedia ||
        navigator.msGetUserMedia);

    navigator.getUserMedia({audio: true}, create_stream, function (err) {
        console.log(err)
    });

    function create_stream(user_media) {
        var stream_input = audio_context.createMediaStreamSource(user_media);
        reco = new Recorder(stream_input);
    }


    function start_reco() {
        reco.record();
    }

    function stop_reco() {
        reco.stop();
        reco.exportWAV(function (wav_file) {
            console.log(wav_file);
            var formdata = new FormData(); // form 表单 {key:value}
            formdata.append("Reco",wav_file); // form input type="file"
            $.ajax({
            url: "http://127.0.0.1:5000/uploader",
            type: 'post',
            processData: false,
            contentType: false,
            data: formdata,
            dataType: 'json',
            success: function (data) {
                console.log(data);
                document.getElementById("player").src = "http://127.0.0.1:5000/get_chat/"+data.file_name;
            }
            })
        });

        reco.clear();
    }


</script>
</html>