语音唤醒代码
没有的库自己pip
# -*- coding: utf-8 -*-
import pvporcupine
import pyaudio # 录音库
import struct
from playsound import playsound # 调用电脑进行播放
from TTS import audio
import time
PICOVOICE_API_KEY = "自己的去网站申请秘钥"
porcipine = pvporcupine.create(
access_key=PICOVOICE_API_KEY,
keyword_paths=['D:/Py_file/xiaoshuai/ppxiaoshuai.ppn'], # 自己去网站下载自己自定义的模型
)
myaudio = pyaudio.PyAudio()
stream = myaudio.open(
input_device_index=0,
rate=porcipine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcipine.frame_length
)
while True:
audio_obj = stream.read(porcipine.frame_length, exception_on_overflow=False)
audio_obj_unpacked = struct.unpack("h" * porcipine.frame_length, audio_obj)
keyword_idx = porcipine.process(audio_obj_unpacked)
if keyword_idx >= 0:
print("识别成功")
audio(main, "min.mp3")
time.sleep(0.5)
playsound("min.mp3")
TTS语音播放
简单用了一下edge_tts TTS语音库,具体你喜欢那个声音你自己去处理(去百度查下能看到音色)
import edge_tts
import asyncio
text = "主人,您找我有什么事"
voice = "zh-CN-XiaoyiNeural" # 这个可以指定
output_file = "audio.wav"
async def my_function(text, output_file):
tts = edge_tts.Communicate(text=text, voice=voice)
await tts.save(output_file)
def audio(text, output_file):
loop = asyncio.get_event_loop()
loop.run_until_complete(my_function(text, output_file))
下面具体的扩展,你可以使用
Funasr
Whisper
Kaldi
以上来自B站大佬的学习地址如下:
https://www.bilibili.com/video/BV1Ns4y1m7TH/?spm_id_from=333.880.my_history.page.click&vd_source=c56d9b69c658f05bb1fc6f82627a29ca
https://picovoice.ai/
这个是我最新自己在训练的基于达摩院开源,也分享出来,感觉更家友好:
https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun/summary