树莓派学习之旅(4)— 综合心知天气、语言唤醒和百度API做一个语音天气助手
一、修改代码
在之前博客(树莓派学习之旅(3)— 使用 snowboy 做语音唤醒功能)的基础上,对代码进行修改。
因为在打开声卡的过程中需要消耗比较多的时间,所以我们在打开声卡后,打印一行提示符,
因为我们之后要使用声卡播放声音,所以将声卡的output设为True,
然后我们将回调函数参数修改,不再使用他提供的回调函数
接下来我们就来完成这个回调函数,首先,我们需要将声卡关闭,因为语音唤醒的声卡是使用回调函数的,我们不能读写,所以重新开启一次声卡
def detected_callback():
detector.terminate() # 关闭声卡
# 再次打开声卡
stream = detector.audio.open(format = paInt16, channels = 1,rate = 16000, input = True,output=True, frames_per_buffer = 2048)
然后播放 “你好,主人” 的语音,播放完后开始录音,这个在之前的博客说过:树莓派学习之旅(2)— 使用USB声卡进行录音
print("你好,主人!")
play_audio(stream,"wav/nihaozhuren.wav") # 播放 主人你好
record(stream) # 录音
录音好后通过百度API进行语言识别:Python开发之路(2)— 使用百度API实现语音识别
restr = baidu_api.baisu_asr(token)
print(restr)
通过语音识别得到的结果,然后判断该做出什么反应,并使用百度语音合成,然后播放:Python开发之路(3)— 使用百度API实现语音合成
if "天气" in restr:
weather = baidu_api.get_weather()
spk_str = "今天天气:%s。气温:%s摄氏度。"%(weather["text"],weather["temperature"])
print(spk_str)
baidu_api.baidu_tts(spk_str,token)
play_audio(stream,"wav/result.wav")
elif( ("开灯" in restr) or ("把灯打开" in restr) ):
spk_str = "主人。灯已打开"
print(spk_str)
baidu_api.baidu_tts(spk_str,token)
play_audio(stream,"wav/result.wav")
stream.close()
二、代码
1、demo.py的代码
import snowboydecoder
import sys,wave,os,time
import signal
from tqdm import tqdm
from pyaudio import PyAudio,paInt16
import baidu_api
interrupted = False
def signal_handler(signal, frame):
global interrupted
interrupted = True
def interrupt_callback():
global interrupted
return interrupted
if len(sys.argv) == 1:
print("Error: need to specify model name")
print("Usage: python demo.py your.model")
sys.exit(-1)
model = sys.argv[1]
# capture SIGINT signal, e.g., Ctrl+C
signal.signal(signal.SIGINT, signal_handler)
token = baidu_api.get_token()
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
print('Press Ctrl+C to exit')
def play_audio(stream,filename):
stream.start_stream()
wf = wave.open(filename, 'rb')
while True:
data = wf.readframes(2048)
if data == b"": break
stream.write(data)
stream.stop_stream()
time.sleep(0.01)
wf.close()
def record(stream):
stream.start_stream()
record_buf = []
# 开始采样
for i in tqdm(range( 8 * 4 )): # 录音5秒
audio_data = stream.read(2048) # 读出声卡缓冲区的音频数据
record_buf.append(audio_data) # 将读出的音频数据追加到record_buf列表
wf = wave.open("01.wav", 'wb') # 创建一个音频文件,名字为“01.wav"
wf.setnchannels(1) # 设置声道数为2
wf.setsampwidth(2) # 设置采样深度为2个字节,即16位
wf.setframerate(16000) # 设置采样率为16000
# 将数据写入创建的音频文件
wf.writeframes("".encode().join(record_buf))
# 写完后将文件关闭
wf.close()
stream.stop_stream()
time.sleep(0.01)
def detected_callback():
detector.terminate() # 关闭声卡
# 再次打开声卡
stream = detector.audio.open(format = paInt16, channels = 1,rate = 16000, input = True,output=True, frames_per_buffer = 2048)
print("你好,主人!")
play_audio(stream,"wav/nihaozhuren.wav") # 播放 主人你好
record(stream) # 录音
restr = baidu_api.baisu_asr(token)
print(restr)
if "天气" in restr:
weather = baidu_api.get_weather()
spk_str = "今天天气:%s。气温:%s摄氏度。"%(weather["text"],weather["temperature"])
print(spk_str)
baidu_api.baidu_tts(spk_str,token)
play_audio(stream,"wav/result.wav")
elif( ("开灯" in restr) or ("把灯打开" in restr) ):
spk_str = "主人。灯已打开"
print(spk_str)
baidu_api.baidu_tts(spk_str,token)
play_audio(stream,"wav/result.wav")
stream.close()
#detector.start(detected_callback=detected_callback,interrupt_check=interrupt_callback,sleep_time=0.03)
# main loop
detector.start(detected_callback=detected_callback,
interrupt_check=interrupt_callback,
sleep_time=0.03)
detector.terminate()
2、baidu_api.py的代码
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Author: William
# encoding:utf-8
import json
from urllib import request,parse
def get_token():
API_Key = "DF2wS4DQ53TlS8ATxasy0ZXv" # 官网获取的API_Key
Secret_Key = "GvADiMXnwATEhaiKuOXg3t37KnKClGWr" # 为官网获取的Secret_Key
#拼接得到Url
Url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id="+API_Key+"&client_secret="+Secret_Key
try:
resp = request.urlopen(Url)
result = json.loads(resp.read().decode('utf-8'))
# 打印access_token
print("access_token:",result['access_token'])
return result['access_token']
except request.URLError as err:
print('token http response http code : ' + str(err.code))
def baisu_asr(token):
# 2、打开需要识别的语音文件
speech_data = []
with open("01.wav", 'rb') as speech_file:
speech_data = speech_file.read()
length = len(speech_data)
if length == 0:
print('file 01.wav length read 0 bytes')
# 3、设置Url里的参数
params = {'cuid': "12345678python", # 用户唯一标识,用来区分用户,长度为60字符以内。
'token': token, # 我们获取到的 Access Token
'dev_pid': 1537 } # 1537 表示识别普通话
# 将参数编码
params_query = parse.urlencode(params)
# 拼接成一个我们需要的完整的完整的url
Url = 'http://vop.baidu.com/server_api' + "?" + params_query
# 4、设置请求头
headers = {
'Content-Type': 'audio/wav; rate=16000', # 采样率和文件格式
'Content-Length': length
}
# 5、发送请求,音频数据直接放在body中
# 构建Request对象
req = request.Request(Url, speech_data, headers)
# 发送请求
res_f = request.urlopen(req)
result = json.loads(res_f.read().decode('utf-8'))
print(result)
return result['result'][0]
def baidu_tts(TEXT,token):
# 2、将需要合成的文字做2次urlencode编码
tex = parse.quote_plus(TEXT) # 两次urlencode
# 3、设置文本以及其他参数
params = {'tok': token, # 开放平台获取到的开发者access_token
'tex': tex, # 合成的文本,使用UTF-8编码。小于2048个中文字或者英文数字
'per': 4, # 发音人选择, 基础音库:0为度小美,1为度小宇,3为度逍遥,4为度丫丫,
'spd': 5, # 语速,取值0-15,默认为5中语速
'pit': 5, # 音调,取值0-15,默认为5中语调
'vol': 5, # 音量,取值0-15,默认为5中音量
'aue': 6, # 下载的文件格式, 3为mp3格式(默认); 4为pcm-16k; 5为pcm-8k; 6为wav(内容同pcm-16k)
'cuid': "7749py", # 用户唯一标识
'lan': 'zh', 'ctp': 1} # lan ctp 固定参数
# 4、将参数编码,然后放入body,生成Request对象
data = parse.urlencode(params)
req = request.Request("http://tsn.baidu.com/text2audio", data.encode('utf-8'))
# 5、发送post请求
f = request.urlopen(req)
result_str = f.read()
# 6、将返回的header信息取出并生成一个字典
headers = dict((name.lower(), value) for name, value in f.headers.items())
# 7、如果返回的header里有”Content-Type: audio/wav“信息,则合成成功
if "audio/wav" in headers['content-type'] :
print("tts success")
# 合成成功即将数据存入文件
with open("wav/result.wav", 'wb') as of:
of.write(result_str)
def get_weather():
KEY = "SqEQJuFtxQBkZNNGC" # 官网获取的私钥
params = { 'key': KEY,
'location': "赣州",
'language': "zh-Hans",
'unit': "c"
}
# 将参数编码
params_query = parse.urlencode(params)
# 拼接成一个我们需要的完整的完整的url
Url = "https://api.seniverse.com/v3/weather/now.json" + "?" + params_query
# 发送请求
res_f = request.urlopen(Url)
# 对获取的数据进行处理
result = json.loads(res_f.read().decode('utf-8'))
# 打印获得的结果
print(result["results"][0])
# 天气
return result["results"][0]["now"]
3、snowboydecoder.py修改后的代码
#!/usr/bin/env python
import collections
import pyaudio
import snowboydetect
import time
import wave
import os
import logging
from ctypes import *
from contextlib import contextmanager
logging.basicConfig()
logger = logging.getLogger("snowboy")
logger.setLevel(logging.INFO)
TOP_DIR = os.path.dirname(os.path.abspath(__file__))
RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
def py_error_handler(filename, line, function, err, fmt):
pass
ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
@contextmanager
def no_alsa_error():
try:
asound = cdll.LoadLibrary('libasound.so')
asound.snd_lib_error_set_handler(c_error_handler)
yield
asound.snd_lib_error_set_handler(None)
except:
yield
pass
class RingBuffer(object):
"""Ring buffer to hold audio from PortAudio"""
def __init__(self, size=4096):
self._buf = collections.deque(maxlen=size)
def extend(self, data):
"""Adds data to the end of buffer"""
self._buf.extend(data)
def get(self):
"""Retrieves data from the beginning of buffer and clears it"""
tmp = bytes(bytearray(self._buf))
self._buf.clear()
return tmp
def play_audio_file(fname=DETECT_DONG):
"""Simple callback function to play a wave file. By default it plays
a Ding sound.
:param str fname: wave file name
:return: None
"""
ding_wav = wave.open(fname, 'rb')
ding_data = ding_wav.readframes(ding_wav.getnframes())
with no_alsa_error():
audio = pyaudio.PyAudio()
stream_out = audio.open(
format=audio.get_format_from_width(ding_wav.getsampwidth()),
channels=ding_wav.getnchannels(),
rate=ding_wav.getframerate(), input=False, output=True)
stream_out.start_stream()
stream_out.write(ding_data)
time.sleep(0.2)
stream_out.stop_stream()
stream_out.close()
audio.terminate()
class HotwordDetector(object):
"""
Snowboy decoder to detect whether a keyword specified by `decoder_model`
exists in a microphone input stream.
:param decoder_model: decoder model file path, a string or a list of strings
:param resource: resource file path.
:param sensitivity: decoder sensitivity, a float of a list of floats.
The bigger the value, the more senstive the
decoder. If an empty list is provided, then the
default sensitivity in the model will be used.
:param audio_gain: multiply input volume by this factor.
:param apply_frontend: applies the frontend processing algorithm if True.
"""
def __init__(self, decoder_model,
resource=RESOURCE_FILE,
sensitivity=[],
audio_gain=1,
apply_frontend=False):
tm = type(decoder_model)
ts = type(sensitivity)
if tm is not list:
decoder_model = [decoder_model]
if ts is not list:
sensitivity = [sensitivity]
model_str = ",".join(decoder_model)
self.detector = snowboydetect.SnowboyDetect(
resource_filename=resource.encode(), model_str=model_str.encode())
self.detector.SetAudioGain(audio_gain)
self.detector.ApplyFrontend(apply_frontend)
self.num_hotwords = self.detector.NumHotwords()
if len(decoder_model) > 1 and len(sensitivity) == 1:
sensitivity = sensitivity * self.num_hotwords
if len(sensitivity) != 0:
assert self.num_hotwords == len(sensitivity), \
"number of hotwords in decoder_model (%d) and sensitivity " \
"(%d) does not match" % (self.num_hotwords, len(sensitivity))
sensitivity_str = ",".join([str(t) for t in sensitivity])
if len(sensitivity) != 0:
self.detector.SetSensitivity(sensitivity_str.encode())
self.ring_buffer = RingBuffer(
self.detector.NumChannels() * self.detector.SampleRate() * 5)
# 1、新建一个PyAudio对象
with no_alsa_error():
self.audio = pyaudio.PyAudio()
def start(self, detected_callback=play_audio_file,
interrupt_check=lambda: False,
sleep_time=0.03,
audio_recorder_callback=None,
silent_count_threshold=15,
recording_timeout=100):
"""
Start the voice detector. For every `sleep_time` second it checks the
audio buffer for triggering keywords. If detected, then call
corresponding function in `detected_callback`, which can be a single
function (single model) or a list of callback functions (multiple
models). Every loop it also calls `interrupt_check` -- if it returns
True, then breaks from the loop and return.
:param detected_callback: a function or list of functions. The number of
items must match the number of models in
`decoder_model`.
:param interrupt_check: a function that returns True if the main loop
needs to stop.
:param float sleep_time: how much time in second every loop waits.
:param audio_recorder_callback: if specified, this will be called after
a keyword has been spoken and after the
phrase immediately after the keyword has
been recorded. The function will be
passed the name of the file where the
phrase was recorded.
:param silent_count_threshold: indicates how long silence must be heard
to mark the end of a phrase that is
being recorded.
:param recording_timeout: limits the maximum length of a recording.
:return: None
"""
self._running = True
def audio_callback(in_data, frame_count, time_info, status):
self.ring_buffer.extend(in_data)
play_data = chr(0) * len(in_data)
return play_data, pyaudio.paContinue
# 2、打开声卡
def open_stream():
self.stream_in = self.audio.open(
input=True, output=True,
format=self.audio.get_format_from_width(
self.detector.BitsPerSample() / 8),
channels=self.detector.NumChannels(),
rate=self.detector.SampleRate(),
frames_per_buffer=2048,
stream_callback=audio_callback)
# 3、打开声卡后,打印一行提示信息
print("I am Listening......")
open_stream()
# print("**************channels:",self.detector.NumChannels())
# print("**************format:",self.audio.get_format_from_width(self.detector.BitsPerSample() / 8))
# print("**************rate:",self.detector.SampleRate())
if interrupt_check():
logger.debug("detect voice return")
return
tc = type(detected_callback)
if tc is not list:
detected_callback = [detected_callback]
if len(detected_callback) == 1 and self.num_hotwords > 1:
detected_callback *= self.num_hotwords
assert self.num_hotwords == len(detected_callback), \
"Error: hotwords in your models (%d) do not match the number of " \
"callbacks (%d)" % (self.num_hotwords, len(detected_callback))
logger.debug("detecting...")
state = "PASSIVE"
while self._running is True:
if interrupt_check():
logger.debug("detect voice break")
break
data = self.ring_buffer.get()
if len(data) == 0:
time.sleep(sleep_time)
continue
status = self.detector.RunDetection(data)
if status == -1:
logger.warning("Error initializing streams or reading audio data")
#small state machine to handle recording of phrase after keyword
if state == "PASSIVE":
if status > 0: #key word found
self.recordedData = []
self.recordedData.append(data)
silentCount = 0
recordingCount = 0
message = "Keyword " + str(status) + " detected at time: "
message += time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime(time.time()))
logger.info(message)
callback = detected_callback[status-1]
if callback is not None:
callback()
self.stream_in.close()
open_stream()
if audio_recorder_callback is not None:
state = "ACTIVE"
continue
elif state == "ACTIVE":
stopRecording = False
if recordingCount > recording_timeout:
stopRecording = True
elif status == -2: #silence found
if silentCount > silent_count_threshold:
stopRecording = True
else:
silentCount = silentCount + 1
elif status == 0: #voice found
silentCount = 0
if stopRecording == True:
fname = self.saveMessage()
audio_recorder_callback(fname)
state = "PASSIVE"
continue
recordingCount = recordingCount + 1
self.recordedData.append(data)
logger.debug("finished.")
def saveMessage(self):
"""
Save the message stored in self.recordedData to a timestamped file.
"""
filename = 'output' + str(int(time.time())) + '.wav'
data = b''.join(self.recordedData)
#use wave to save data
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(self.audio.get_sample_size(
self.audio.get_format_from_width(
self.detector.BitsPerSample() / 8)))
wf.setframerate(self.detector.SampleRate())
wf.writeframes(data)
wf.close()
logger.debug("finished saving: " + filename)
return filename
def terminate(self):
"""
Terminate audio stream. Users can call start() again to detect.
:return: None
"""
self.stream_in.stop_stream()
self.stream_in.close()
# self.audio.terminate()
# self._running = False