flask之人工智能

最新推荐文章于 2024-03-18 08:54:50 发布

weixin_30588827

最新推荐文章于 2024-03-18 08:54:50 发布

阅读量185

点赞数

文章标签： python 人工智能 ffmpeg

原文链接：http://www.cnblogs.com/mlhz/p/10274523.html

版权

一 flask之百度语音识别，语音合成，tuling机器人

1 百度语音识别，语音合成

1 1 注册百度云
2 
3 2 找到百度语音
4 
5 3 找到自然语音
6 
7 4 看文档写代码

步骤

2 flask创建语音合成（python）

 1 注意：要想使用百度的自然语音，必须下载
 2 1  pip3 install baidu-aip
 3 2  FFmpeg
 4 
 5 #语音合成
 6 
 7 from aip import AipSpeech
 8 
 9 """ 你的 APPID AK SK """
10 APP_ID = '15420336'
11 API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
12 SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'
13 
14 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
15 
16 #语音合成
17 result  = client.synthesis('先帝创业未半而中道崩殂', 'zh', 1, {
18     'vol': 5,
19     "spd": 3,
20     "pit": 7,
21     "per": 4
22 })
23 print(result)
24 
25 # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
26 if not isinstance(result, dict):
27     with open('audio.mp3', 'wb') as f:
28         f.write(result)
29 
30 
31 
32 
33 #注意  管理应用
34 
35 #""" 你的 APPID AK SK """
36 #APP_ID = '你的 App ID'
37 #API_KEY = '你的 Api Key'
38 #SECRET_KEY = '你的 Secret Key'
39 #详细请看文档

语音合成

3 flask创建语音识别（python）

from aip import AipSpeech
import os


""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)


# 读取文件  语音识别
def get_file_content(filePath):
    os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")  #注意 下载完的ffmpeg  路径要加入到环境变量中
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()

# 识别本地文件
res = client.asr(get_file_content('wyn.wma'), 'pcm', 16000, {
    'dev_pid': 1536,
})

print(res.get("result")[0])

3 两者合在一起，用函数封装

from aip import AipSpeech
import time,os

""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

# 读取文件
def get_file_content(filePath):
    os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()

def audio2text(filepath):
    # 识别本地文件
    res = client.asr(get_file_content(filepath), 'pcm', 16000, {
        'dev_pid': 1536,
    })

    print(res.get("result")[0])

    return res.get("result")[0]

def text2audio(text):
    filename = f"{time.time()}.mp3"
    result = client.synthesis(text, 'zh', 1, {
        'vol': 5,
        "spd": 3,
        "pit": 7,
        "per": 4
    })

    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    if not isinstance(result, dict):
        with open(filename, 'wb') as f:
            f.write(result)

    return filename


text = audio2text("wyn.wma")
filename = text2audio(text)

os.system(filename)

tuling机器人的网址：www.tuling123.com

1 注册图灵机器人

2 创建机器人

4 把图灵机器人加入

from aip import AipSpeech,AipNlp
import time,os

""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'

nlp =  AipNlp(APP_ID, API_KEY, SECRET_KEY)
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

# 读取文件
def get_file_content(filePath):
    os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()

def audio2text(filepath):
    # 识别本地文件
    res = client.asr(get_file_content(filepath), 'pcm', 16000, {
        'dev_pid': 1536,
    })

    print(res.get("result")[0])

    return res.get("result")[0]

def text2audio(text):
    filename = f"{time.time()}.mp3"
    result = client.synthesis(text, 'zh', 1, {
        'vol': 5,
        "spd": 3,
        "pit": 7,
        "per": 4
    })

    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    if not isinstance(result, dict):
        with open(filename, 'wb') as f:
            f.write(result)

    return filename

def to_tuling(text):
    import requests

    args = {
        "reqType": 0, #0代表文本
        "perception": {    #输入信息
            "inputText": {    #文本
                   "text": text
            }
        },
        "userInfo": {
            "apiKey": "9a9a026e2eb64ed6b006ad99d27f6b9e",  #注意 这里是你创建的图灵机器人的apikey
            "userId": "1111"   #用户id 
        }
    }

    url = "http://openapi.tuling123.com/openapi/api/v2"

    res = requests.post(url, json=args)

    text = res.json().get("results")[0].get("values").get("text")
    return text

# res = nlp.simnet("你叫什么名字","你的名字是什么")
# print(res)


text = audio2text("bjtq.wma")

if nlp.simnet("你叫什么名字",text).get("score") >= 0.68 :   #语义相似度
    text = "我的名字叫大王8"
else:
    text = to_tuling(text)

filename = text2audio(text)

os.system(filename)

5 使用百度自然语音识别与自然语音合成，加上图灵机器人自动回复.做一个应用

# app.py

from flask import Flask,render_template,request,jsonify,send_file
from uuid import uuid4
import baidu_ai

app = Flask(__name__)

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/ai",methods=["POST"])
def ai():
    # 1.保存录音文件
    audio = request.files.get("record")
    filename = f"{uuid4()}.wav"
    audio.save(filename)
    #2.将录音文件转换为PCM发送给百度进行语音识别
    q_text = baidu_ai.audio2text(filename)

    #3.将识别的问题交给图灵或自主处理获取答案
    a_text = baidu_ai.to_tuling(q_text)

    #4.将答案发送给百度语音合成，合成音频文件
    a_file = baidu_ai.text2audio(a_text)

    #5.将音频文件发送给前端播放

    return jsonify({"filename":a_file})


@app.route("/get_audio/<filename>")
def get_audio(filename):
    return send_file(filename)



if __name__ == '__main__':
    app.run("0.0.0.0",9527,debug=True)

# baidu_ai.py
from aip import AipSpeech,AipNlp
import time,os

""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'

nlp =  AipNlp(APP_ID, API_KEY, SECRET_KEY)
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

# 读取文件
def get_file_content(filePath):
    os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", 'rb') as fp:
        return fp.read()

def audio2text(filepath):
    # 识别本地文件
    res = client.asr(get_file_content(filepath), 'pcm', 16000, {
        'dev_pid': 1536,
    })

    print(res.get("result")[0])

    return res.get("result")[0]

def text2audio(text):
    filename = f"{time.time()}.mp3"
    result = client.synthesis(text, 'zh', 1, {
        'vol': 5,
        "spd": 3,
        "pit": 7,
        "per": 4
    })

    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    if not isinstance(result, dict):
        with open(filename, 'wb') as f:
            f.write(result)

    return filename

def to_tuling(text):
    import requests

    args = {
        "reqType": 0,
        "perception": {
            "inputText": {
                "text": text
            }
        },
        "userInfo": {
            "apiKey": "9a9a026e2eb64ed6b006ad99d27f6b9e",
            "userId": "1111"
        }
    }

    url = "http://openapi.tuling123.com/openapi/api/v2"

    res = requests.post(url, json=args)

    text = res.json().get("results")[0].get("values").get("text")

    print("图灵答案",text)
    return text

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<audio controls autoplay id="player"></audio>
<p>
    <button οnclick="start_reco()" style="background-color: yellow">录制语音指令</button>
</p>
<p>
    <button οnclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button>
</p>
</body>
<!--<script type="application/javascript" src="/static/Recorder.js"></script>-->
<script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script>
<script type="text/javascript" src="/static/jQuery3.1.1.js"></script>

<script type="text/javascript">
    var reco = null;
    var audio_context = new AudioContext();
    navigator.getUserMedia = (navigator.getUserMedia ||
        navigator.webkitGetUserMedia ||
        navigator.mozGetUserMedia ||
        navigator.msGetUserMedia);

    navigator.getUserMedia({audio: true}, create_stream, function (err) {
        console.log(err)
    });

    function create_stream(user_media) {
        var stream_input = audio_context.createMediaStreamSource(user_media);
        reco = new Recorder(stream_input);
    }

    function start_reco() {
        reco.record();
    }


    function stop_reco_audio() {
        reco.stop();
        send_audio();
        reco.clear();
    }


    function send_audio() {
        reco.exportWAV(function (wav_file) {
            var formdata = new FormData();
            formdata.append("record", wav_file);
            console.log(formdata);
            $.ajax({
                url: "http://192.168.13.42:9527/ai",
                type: 'post',
                processData: false,
                contentType: false,
                data: formdata,
                dataType: 'json',
                success: function (data) {
                    document.getElementById("player").src ="http://192.168.13.42:9527/get_audio/" + data.filename
                }
            });

        })
    }



</script>
</html>

转载于:https://www.cnblogs.com/mlhz/p/10274523.html