【手把手教你做一个工作辅助机器人】--- 音频生成模块

最新推荐文章于 2024-09-25 15:45:00 发布

卢比奥

最新推荐文章于 2024-09-25 15:45:00 发布

阅读量18

点赞数

文章标签：音视频

【手把手教你做一个工作辅助机器人】--- 音频生成模块_语音识别

欢迎关注我👆，收藏下次不迷路┗|｀O′|┛ 嗷~~

一.模块介绍

二.文字转语音演示

三.模块代码

一.模块介绍

音频生成模块的主要功能是实现文字转语音，本项目采用百度云api实现。

二.文字转语音演示

from aip import AipSpeech  
from playsound import playsound
import os
from pydub import AudioSegment
# 你的 APPID AK SK  
APP_ID = ''  
API_KEY = ''  
SECRET_KEY = ''  
  
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)  
  
# 调用接口  
result = client.synthesis('你好主人～', 'zh', 1, {  
    'vol': 5,  # 音量，取值0-15，默认为5中音量  
    'spd': 4,  # 语速，取值0-9，默认为5中语速  
    'pit': 5,  # 语调，取值0-9，默认为5中语调  
    'per': 4,  # 发音人选择, 0为女声，1为男声，3为情感合成-度逍遥，4为情感合成-度丫丫，默认为普通女  
})  
  
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码  
if not isinstance(result, dict):  
    with open('output.mp3', 'wb') as file:
        file.write(result)
 
    # 转换mp3为wav
    mp3_file = AudioSegment.from_file("output.mp3", "mp3")
    wav_file = mp3_file.set_frame_rate(44100).set_channels(1)
    wav_file.export("output.wav", format="wav")
    os.system("play output.wav")

这些代码可以实现文本转语音并播放生成的音频文件。

三.模块代码

本设计的功能模块要更加复杂一些：

from aip import AipSpeech  
from playsound import playsound
import os,requests
from datetime import datetime
from pydub import AudioSegment
# 你的 APPID AK SK  
APP_ID = ''  
API_KEY = ''  
SECRET_KEY = ''  
  
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)  
def playsound_func(input_word):
    # 调用接口  
    result = client.synthesis(input_word, 'zh', 1, {  
        'vol': 5,  # 音量，取值0-15，默认为5中音量  
        'spd': 4,  # 语速，取值0-9，默认为5中语速  
        'pit': 5,  # 语调，取值0-9，默认为5中语调  
        'per': 4,  # 发音人选择, 0为女声，1为男声，3为情感合成-度逍遥，4为情感合成-度丫丫，默认为普通女  
    })  
  
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码  
    if not isinstance(result, dict):  
        with open('output.mp3', 'wb') as file:
            file.write(result)
 
        # 转换mp3为wav
        mp3_file = AudioSegment.from_file("output.mp3", "mp3")
        wav_file = mp3_file.set_frame_rate(44100).set_channels(1)
        wav_file.export("output.wav", format="wav")
        os.system("play output.wav")
now=datetime.now()
pre_time=now.strftime("%H:%M")  
while True:
    url = 'http://127.0.0.1:6999/get_sound_state'  
    response = requests.post(url, json={})

    if response.json()['words']=="你好主人":
        os.system("play sound_data/nihaozhuren.wav")
    
    else:
        if response.json()['words']!="":
            playsound_func(response.json()['words'])
    now=datetime.now()
    current_time = now.strftime("%H:%M")  
    if pre_time!=current_time:
        url = 'http://云服务器:6999/get_tasks'  
        response = requests.post(url, json={})
        data=response.json()["data"]
        for dat in data:
            # 获取当前日期和时间  
            now = datetime.now()  
  
            # 获取当前日期  
            current_date = now.date()  
  
            # 获取当前时分  
            current_time = now.strftime("%H:%M")  
            file_date=dat.split("T")[0]
            if file_date==str(current_date).replace("-",""):
                file_time=dat.split("T")[1].split("_")[0]
                if file_time==str(current_time):

                    url = 'http://云服务器:6999/read_task'  
                    response = requests.post(url, json={'filepath':dat})
                    words="主人，现在是"+str(current_time).split(":")[0]+"时"+str(current_time).split(":")[1]+"分，此时应该"+response.json()['words']+"啦"
                    playsound_func(words)
            
        pre_time=current_time