机器人语音问答的需要,调用百度AI的语音识别
这里的思路很简单,就是用百度的API,初始化客户端,然后输入参数进行调用。
代码
-
import wave
-
import pyaudio
-
from aip
import AipSpeech,AipNlp
-
from playsound
import playsound
-
-
""" 你的 APPID AK SK """
-
APP_ID =
'****'
-
API_KEY =
'****'
-
SECRET_KEY =
'****'
-
-
# 读取文件
-
def get_file_content(filePath):
-
with open(filePath,
'rb')
as fp:
-
return fp.read()
-
-
-
# 录音功能
-
def record_content():
-
CHUNK =
1024
-
FORMAT = pyaudio.paInt16
-
CHANNELS =
1
-
RATE =
16000
-
RECORD_SECONDS =
3
-
-
WAVE_OUTPUT_FILENAME =
"audio.wav"
-
p = pyaudio.PyAudio()
-
stream = p.open(format=FORMAT, channels=CHANNELS,
-
rate=RATE, input=
True,
-
frames_per_buffer=CHUNK)
-
print(
"* recording")
-
-
frames = []
-
for j
in range(
0, int(RATE / CHUNK * RECORD_SECONDS)):
-
data = stream.read(CHUNK)
-
frames.append(data)
-
-
print(
"* done recording")
-
-
stream.stop_stream()
-
stream.close()
-
p.terminate()
-
-
wf = wave.open(WAVE_OUTPUT_FILENAME,
'wb')
-
wf.setnchannels(CHANNELS)
-
wf.setsampwidth(p.get_sample_size(FORMAT))
-
wf.setframerate(RATE)
-
wf.writeframes(
b''.join(frames))
-
wf.close()
-
print(
"done ------------------------------ ")
-
return WAVE_OUTPUT_FILENAME
-
-
-
# 生成语音功能客户端
-
client_audio = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
-
-
# 语音录制
-
filePath = record_content()
-
-
# 语音识别
-
result_audio = client_audio.asr(get_file_content(filePath),
'wav',
16000, {
-
'dev_pid':
1536,
-
})
-
content_audio = result_audio[
'result'][
0]
-
print(content_audio)
-
-
-
# 自然语音处理客户端
-
client_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
-
-
# text = "百度是一家高科技公司"
-
text = content_audio
-
-
""" 调用词法分析 """
-
xx = client_nlp.lexer(text)
-
content_answer = xx[
'items'][
0][
'item']
-
-
# 语音合成
-
try:
-
result_answer = client_audio.synthesis(content_answer,
'zh',
1, {
-
'vol':
5,
-
})
-
except Exception
as e:
-
print(e)
-
-
# 语音写入
-
if
not isinstance(result_answer, dict):
-
with open(
'audio.mp3',
'wb')
as f:
-
f.write(result_answer)
-
-
# 语音播放
-
playsound(
'audio.mp3')
录音
首先将对方的语音录下,存为 **.wav 音频文件,其中原始 PCM 的录音参数必须符合 16k 采样率、16bit 位深、单声道,支持的格式有:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)
-
# 录音功能
-
def record_content():
-
CHUNK =
1024
-
FORMAT = pyaudio.paInt16
-
CHANNELS =
1
-
RATE =
16000
-
RECORD_SECONDS =
3
-
-
WAVE_OUTPUT_FILENAME =
"audio.wav"
-
p = pyaudio.PyAudio()
-
stream = p.open(format=FORMAT, channels=CHANNELS,
-
rate=RATE, input=
True,
-
frames_per_buffer=CHUNK)
-
print(
"* recording")
-
-
frames = []
-
for j
in range(
0, int(RATE / CHUNK * RECORD_SECONDS)):
-
data = stream.read(CHUNK)
-
frames.append(data)
-
-
print(
"* done recording")
-
-
stream.stop_stream()
-
stream.close()
-
p.terminate()
-
-
wf = wave.open(WAVE_OUTPUT_FILENAME,
'wb')
-
wf.setnchannels(CHANNELS)
-
wf.setsampwidth(p.get_sample_size(FORMAT))
-
wf.setframerate(RATE)
-
wf.writeframes(
b''.join(frames))
-
wf.close()
-
print(
"done ------------------------------ ")
-
return WAVE_OUTPUT_FILENAME
-
识别
然后将录音文件进行识别
代码如下
-
# 语音识别
-
result_audio = client_audio.asr(get_file_content(filePath),
'wav',
16000, {
-
'dev_pid':
1536,
-
})
-
content_audio = result_audio[
'result'][
0]
-
print(content_audio)
处理
-
# 自然语音处理客户端
-
client_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
-
-
# text = "百度是一家高科技公司"
-
text = content_audio
-
-
""" 调用词法分析 """
-
xx = client_nlp.lexer(text)
-
content_answer = xx[
'items'][
0][
'item']
回答
语音文件识别结束之后,将其写入到本地文件,并进行播放(python几种播放方法)
-
# 语音合成
-
try:
-
result_answer = client_audio.synthesis(content_answer,
'zh',
1, {
-
'vol':
5,
-
})
-
except Exception
as e:
-
print(e)
-
-
# 语音写入
-
if
not isinstance(result_answer, dict):
-
with open(
'audio.mp3',
'wb')
as f:
-
f.write(result_answer)
-
-
# 语音播放
-
playsound(
'audio.mp3')
需要解决的问题(有建议请评论告知,感谢!):
1.不定长语音文件的判定(音频文件时长不固定,根据说话时长来确定)
2.一群人中确定一个说话人接收指令