一 flask之百度语音识别,语音合成,tuling机器人
1 百度语音识别,语音合成
1 1 注册百度云 2 3 2 找到百度语音 4 5 3 找到自然语音 6 7 4 看文档写代码
2 flask创建语音合成(python)
1 注意:要想使用百度的自然语音,必须下载 2 1 pip3 install baidu-aip 3 2 FFmpeg 4 5 #语音合成 6 7 from aip import AipSpeech 8 9 """ 你的 APPID AK SK """ 10 APP_ID = '15420336' 11 API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA' 12 SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P' 13 14 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 15 16 #语音合成 17 result = client.synthesis('先帝创业未半而中道崩殂', 'zh', 1, { 18 'vol': 5, 19 "spd": 3, 20 "pit": 7, 21 "per": 4 22 }) 23 print(result) 24 25 # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 26 if not isinstance(result, dict): 27 with open('audio.mp3', 'wb') as f: 28 f.write(result) 29 30 31 32 33 #注意 管理应用 34 35 #""" 你的 APPID AK SK """ 36 #APP_ID = '你的 App ID' 37 #API_KEY = '你的 Api Key' 38 #SECRET_KEY = '你的 Secret Key' 39 #详细请看文档
3 flask创建语音识别(python)
from aip import AipSpeech
import os
""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 读取文件 语音识别
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") #注意 下载完的ffmpeg 路径要加入到环境变量中
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
# 识别本地文件
res = client.asr(get_file_content('wyn.wma'), 'pcm', 16000, {
'dev_pid': 1536,
})
print(res.get("result")[0])
3 两者合在一起,用函数封装
from aip import AipSpeech
import time,os
""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 读取文件
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def audio2text(filepath):
# 识别本地文件
res = client.asr(get_file_content(filepath), 'pcm', 16000, {
'dev_pid': 1536,
})
print(res.get("result")[0])
return res.get("result")[0]
def text2audio(text):
filename = f"{time.time()}.mp3"
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
"spd": 3,
"pit": 7,
"per": 4
})
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
with open(filename, 'wb') as f:
f.write(result)
return filename
text = audio2text("wyn.wma")
filename = text2audio(text)
os.system(filename)
tuling机器人的网址:www.tuling123.com
1 注册图灵机器人
2 创建机器人
4 把图灵机器人加入
from aip import AipSpeech,AipNlp
import time,os
""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'
nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 读取文件
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def audio2text(filepath):
# 识别本地文件
res = client.asr(get_file_content(filepath), 'pcm', 16000, {
'dev_pid': 1536,
})
print(res.get("result")[0])
return res.get("result")[0]
def text2audio(text):
filename = f"{time.time()}.mp3"
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
"spd": 3,
"pit": 7,
"per": 4
})
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
with open(filename, 'wb') as f:
f.write(result)
return filename
def to_tuling(text):
import requests
args = {
"reqType": 0, #0代表文本
"perception": { #输入信息
"inputText": { #文本
"text": text
}
},
"userInfo": {
"apiKey": "9a9a026e2eb64ed6b006ad99d27f6b9e", #注意 这里是你创建的图灵机器人的apikey
"userId": "1111" #用户id
}
}
url = "http://openapi.tuling123.com/openapi/api/v2"
res = requests.post(url, json=args)
text = res.json().get("results")[0].get("values").get("text")
return text
# res = nlp.simnet("你叫什么名字","你的名字是什么")
# print(res)
text = audio2text("bjtq.wma")
if nlp.simnet("你叫什么名字",text).get("score") >= 0.68 : #语义相似度
text = "我的名字叫大王8"
else:
text = to_tuling(text)
filename = text2audio(text)
os.system(filename)
5 使用百度自然语音识别与自然语音合成,加上图灵机器人自动回复.做一个应用
# app.py
from flask import Flask,render_template,request,jsonify,send_file
from uuid import uuid4
import baidu_ai
app = Flask(__name__)
@app.route("/")
def index():
return render_template("index.html")
@app.route("/ai",methods=["POST"])
def ai():
# 1.保存录音文件
audio = request.files.get("record")
filename = f"{uuid4()}.wav"
audio.save(filename)
#2.将录音文件转换为PCM发送给百度进行语音识别
q_text = baidu_ai.audio2text(filename)
#3.将识别的问题交给图灵或自主处理获取答案
a_text = baidu_ai.to_tuling(q_text)
#4.将答案发送给百度语音合成,合成音频文件
a_file = baidu_ai.text2audio(a_text)
#5.将音频文件发送给前端播放
return jsonify({"filename":a_file})
@app.route("/get_audio/<filename>")
def get_audio(filename):
return send_file(filename)
if __name__ == '__main__':
app.run("0.0.0.0",9527,debug=True)
# baidu_ai.py
from aip import AipSpeech,AipNlp
import time,os
""" 你的 APPID AK SK """
APP_ID = '15420336'
API_KEY = 'VwSGcqqwsCl282LGKnFwHDIA'
SECRET_KEY = 'h4oL6Y9yRuvmD0oSdQGQZchNcix4TF5P'
nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 读取文件
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def audio2text(filepath):
# 识别本地文件
res = client.asr(get_file_content(filepath), 'pcm', 16000, {
'dev_pid': 1536,
})
print(res.get("result")[0])
return res.get("result")[0]
def text2audio(text):
filename = f"{time.time()}.mp3"
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
"spd": 3,
"pit": 7,
"per": 4
})
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
with open(filename, 'wb') as f:
f.write(result)
return filename
def to_tuling(text):
import requests
args = {
"reqType": 0,
"perception": {
"inputText": {
"text": text
}
},
"userInfo": {
"apiKey": "9a9a026e2eb64ed6b006ad99d27f6b9e",
"userId": "1111"
}
}
url = "http://openapi.tuling123.com/openapi/api/v2"
res = requests.post(url, json=args)
text = res.json().get("results")[0].get("values").get("text")
print("图灵答案",text)
return text
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<audio controls autoplay id="player"></audio>
<p>
<button οnclick="start_reco()" style="background-color: yellow">录制语音指令</button>
</p>
<p>
<button οnclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button>
</p>
</body>
<!--<script type="application/javascript" src="/static/Recorder.js"></script>-->
<script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script>
<script type="text/javascript" src="/static/jQuery3.1.1.js"></script>
<script type="text/javascript">
var reco = null;
var audio_context = new AudioContext();
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getUserMedia({audio: true}, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
}
function start_reco() {
reco.record();
}
function stop_reco_audio() {
reco.stop();
send_audio();
reco.clear();
}
function send_audio() {
reco.exportWAV(function (wav_file) {
var formdata = new FormData();
formdata.append("record", wav_file);
console.log(formdata);
$.ajax({
url: "http://192.168.13.42:9527/ai",
type: 'post',
processData: false,
contentType: false,
data: formdata,
dataType: 'json',
success: function (data) {
document.getElementById("player").src ="http://192.168.13.42:9527/get_audio/" + data.filename
}
});
})
}
</script>
</html>