【记录】Python3｜用百度语音 API 朗读你的小说TXT

shandianchengzi

已于 2024-04-28 21:26:35 修改

阅读量6.4k

点赞数

分类专栏：代码文章标签： python 百度语音识别

于 2021-08-27 22:26:03 首次发布

本文链接：https://blog.csdn.net/qq_46106285/article/details/119956917

版权

代码专栏收录该内容

43 篇文章

订阅专栏

本文详细介绍如何配置百度语音合成API，提供优化过的代码示例，包括异常处理、配置文件管理与按段朗读文本的改进。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

省流版

百度语音合成官方教程_AI开放平台

百度语音合成官方demo_github.com

简单地写了一个按段落朗读文本的demo：DEMO链接_gitee.com。
有时候会请求不到数据，不知道是网络原因还是什么，已添加自动重新请求。

config.ini：

;关于语音合成的相关配置
[default]
api_key = Your api key
secret_key = Your secret key
;发音人选择, 基础音库：0为度小美，1为度小宇，3为度逍遥，4为度丫丫，
;精品音库：5为度小娇，103为度米朵，106为度博文，110为度小童，111为度小萌，默认为度小美
per = 3
;语速，取值0-15，默认为5中语速
spd = 4
;音调，取值0-15，默认为5中语调
pit = 5
;音量，取值0-9，默认为5中音量
vol = 5
# 下载的文件格式, 3：mp3(default) 4： pcm-16k 5： pcm-8k 6. wav
aue = 3
;下载的文件格式, 可选项：mp3(default), pcm-16k, pcm-8k, wav
format = mp3
cuid = 123456PYTHON
tts_url = http://tsn.baidu.com/text2audio
token_url = http://openapi.baidu.com/oauth/2.0/token
;有此scope表示有tts能力，没有请在网页里勾选
scope = audio_tts_post

[追风筝的人.txt]
text_lct = 12

main.py

# coding=utf-8
import os
import json
from configparser import ConfigParser
from playsound import playsound

from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.error import HTTPError
from urllib.parse import urlencode
from urllib.parse import quote_plus

TEXT = "欢迎使用百度语音合成。"
ini_file = "./config.ini"
cfg_name = "default"
book = "D:/总要删的/追风筝的人.txt"


def load_config(ini, name):
    cfg = ConfigParser()
    # 读取文件内容
    cfg.read(ini, encoding="gbk")
    # cfg.items()返回list，元素为tuple
    return dict(cfg.items(name))


class DemoError(Exception):
    pass


def fetch_token(dft_cfg):
    # print("fetch token begin")
    params = {'grant_type': 'client_credentials',
              'client_id': dft_cfg['api_key'],
              'client_secret': dft_cfg['secret_key']}
    post_data = urlencode(params)
    post_data = post_data.encode('utf-8')
    req = Request(dft_cfg['token_url'], post_data)
    try:
        f = urlopen(req, timeout=5)
        result_str = f.read()
    except URLError as err:
        print('token http response http code : ' + str(err.code))
        result_str = err.read()
    result_str = result_str.decode()

    # print(result_str)
    result = json.loads(result_str)
    # print(result)
    if 'access_token' in result.keys() and 'scope' in result.keys():
        if not dft_cfg['scope'] in result['scope'].split(' '):
            raise DemoError('scope is not correct')
        # print('SUCCESS WITH TOKEN: %s ; EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
        return result['access_token']
    else:
        raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')


def update_text(file, book_title, ini):
    # 读取配置文件
    cfg = ConfigParser()
    # 读取文件内容
    cfg.read(ini, encoding="gbk")
    if cfg.has_option(book_title, "text_lct"):
        now_lct = int(cfg.get(book_title, "text_lct"))
    else:
        cfg.add_section(book_title)
        now_lct = 0

    if len(file) <= now_lct:
        return "已经读到最后一句啦！换本书吧~！"
    else:
        while not len(file[now_lct].strip()):
            now_lct = now_lct + 1
        # 更新配置文件
        cfg.set(book_title, "text_lct", str(now_lct + 1))
        cfg.write(open(ini, "r+"))
        return file[now_lct]


def request_api(params):
    data = urlencode(params)
    req = Request(dft_cfg['tts_url'], data.encode('utf-8'))
    try:
        f = urlopen(req)
        result_str = f.read()
        headers = dict((name.lower(), value) for name, value in f.headers.items())
        has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0)
    except Exception as e:
        print('asr http response http code : ' + str(e))
        result_str = str(e)
        has_error = True
    if has_error:
        print("tts api  error:" + str(result_str, 'utf-8'))
        request_api(params)
    else:
        # Step 3.4: 保存请求的音频结果并输出成temp.mp3，朗读完毕后删除
        save_file = "error.txt" if has_error else 'temp.' + dft_cfg['format']
        with open(save_file, 'wb') as of:
            of.write(result_str)
        playsound(save_file)
        os.remove(save_file)


if __name__ == '__main__':
    # Step 1: 载入配置文件
    dft_cfg = load_config(ini_file, cfg_name)
    # Step 2: 获取Token
    token = fetch_token(dft_cfg)
    # Step 3: 向API发起请求
    # Step 3.1: 初始化请求参数params、书籍标题
    params = {'tok': token, 'tex': '', 'per': dft_cfg['per'], 'spd': dft_cfg['spd'], 'pit': dft_cfg['pit'],
              'vol': dft_cfg['vol'], 'aue': dft_cfg['aue'], 'cuid': dft_cfg['cuid'],
              'lan': 'zh', 'ctp': 1}  # lan ctp 固定参数
    book_title = (book.split('/'))[-1]
    # 打开指定书籍, 并按行读取
    with open(book, "r", encoding='utf-8') as f:
        file = f.readlines()
    # Step 3.2: 不断获取文本并朗读请求得到的音频
    while 1:
        # Step 3.2.1: 根据上次阅读的位置，更新需要合成的文本内容
        TEXT = update_text(file, book_title, ini_file)
        print(TEXT)
        params['tex'] = quote_plus(TEXT)  # 此处TEXT需要两次urlencode
        # Step 3.2.2: 将参数打包，并向指定URL请求，并朗读
        request_api(params)

目前的结果：
在这里插入图片描述

详细解释版

百度语音合成官方教程与实现

首先，感谢百度提供的语音合成官方教程和demo，让我们能够轻松地将文本转换为语音。在此基础上，我创建了一个简单的按段落朗读文本的demo，并已在gitee上分享。

一、官方教程与资源

百度语音合成官方教程：链接
百度语音合成官方demo（GitHub）：链接
我的朗读文本demo（Gitee）：链接

二、实现过程

1. 配置文件（config.ini）

配置文件用于存储语音合成的相关参数，如API密钥、发音人选择、语速、音调、音量等。

;关于语音合成的相关配置
[default]
api_key = Your api key
secret_key = Your secret key
;发音人选择, 基础音库：0为度小美，1为度小宇，3为度逍遥，4为度丫丫，
;精品音库：5为度小娇，103为度米朵，106为度博文，110为度小童，111为度小萌，默认为度小美
per = 3
;语速，取值0-15，默认为5中语速
spd = 4
;音调，取值0-15，默认为5中语调
pit = 5
;音量，取值0-9，默认为5中音量
vol = 5
# 下载的文件格式, 3：mp3(default) 4： pcm-16k 5： pcm-8k 6. wav
aue = 3
;下载的文件格式, 可选项：mp3(default), pcm-16k, pcm-8k, wav
format = mp3
cuid = 123456PYTHON
tts_url = http://tsn.baidu.com/text2audio
token_url = http://openapi.baidu.com/oauth/2.0/token
;有此scope表示有tts能力，没有请在网页里勾选
scope = audio_tts_post

[追风筝的人.txt]
text_lct = 12

2. 主程序（main.py）

主程序使用Python编写，利用urllib库发送HTTP请求，并使用playsound库播放生成的音频文件。

代码就不重复贴了。

三、代码解析与优化

在原始代码中，我们注意到了一些可以改进的地方，以提高CSDN质量分：

异常处理：原始代码中的异常处理较为简单，只是打印了错误信息。为了增强代码的健壮性，我们可以添加更详细的异常处理逻辑，并记录日志以便后续排查问题。
代码注释：虽然代码逻辑相对简单，但添加适当的注释可以帮助其他开发者更好地理解代码的功能和逻辑。
变量命名：变量命名应清晰明了，遵循Python的命名规范。我们可以将某些变量名改为更具描述性的名称，以提高代码的可读性。
代码结构：将功能相近的代码块组织在一起，形成函数或类，可以提高代码的可维护性和复用性。
日志记录：添加日志记录功能，以便在程序运行过程中记录关键信息，方便后续分析和调试。
代码风格：遵循一致的代码风格，如缩进、空格、命名等，可以使代码看起来更加整洁美观。

优化后的代码示例：

AI 改的，我不保证它能用哈。

# coding=utf-8
import os
import json
from configparser import ConfigParser
from playsound import playsound
import urllib.request as request
import urllib.error as error
import urllib.parse as parse
import logging

# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 加载配置文件的函数
def load_config(ini_file, section_name):
    cfg = ConfigParser()
    cfg.read(ini_file, encoding="gbk")
    return dict(cfg.items(section_name))

# 获取Token的函数
def fetch_token(dft_cfg):
    params = {
        'grant_type': 'client_credentials',
        'client_id': dft_cfg['api_key'],
        'client_secret': dft_cfg['secret_key']
    }
    post_data = parse.urlencode(params).encode('utf-8')
    req = request.Request(dft_cfg['token_url'], post_data)
    try:
        response = request.urlopen(req, timeout=5)
        result_str = response.read().decode()
        logging.info("Token请求成功，返回结果：%s", result_str)
    except error.URLError as err:
        logging.error("Token请求失败，错误码：%s", err.code)
        result_str = err.read().decode()

    result = json.loads(result_str)
    if 'access_token' in result and 'scope' in result:
        if dft_cfg['scope'] not in result['scope'].split(' '):
            raise ValueError("scope不正确")
        return result['access_token']
    else:
        raise ValueError("API_KEY或SECRET_KEY不正确，未找到access_token或scope")

# 更新文本的函数
def update_text(file_lines, book_title, ini_file):
    cfg = ConfigParser()
    cfg.read(ini_file, encoding="gbk")
    if cfg.has_option(book_title, "text_lct"):
        now_lct = int(cfg.get(book_title, "text_lct"))
    else:
        cfg.add_section(book_title)
        now_lct = 0

    if len(file_lines) <= now_lct:
        return "已经读到最后一句啦！换本书吧~！"
    else:
        while not file_lines[now_lct].strip():
            now_lct += 1
        # 更新配置文件中的读取位置
        cfg.set(book_title, "text_lct", str(now_lct + 1))
        with open(ini_file, "w", encoding="gbk") as config_file:
            cfg.write(config_file)
        logging.info("更新配置文件，当前读取位置：%d", now_lct + 1)
        return file_lines[now_lct]

# 主函数
def main():
    # 加载配置文件
    dft_cfg = load_config(ini_file, cfg_name)
    # 获取Token
    token = fetch_token(dft_cfg)
    # 打开指定书籍, 并按行读取
    with open(book, "r", encoding='utf-8') as f:

file_lines = f.readlines()

# 不断获取文本并朗读请求得到的音频
while True:
    # 根据上次阅读的位置，更新需要合成的文本内容
    TEXT = update_text(file_lines, book_title, ini_file)
    logging.info("即将朗读的文本：%s", TEXT)
    params = {
        'tok': token,
        'tex': parse.quote_plus(TEXT),  # 对TEXT进行URL编码
        'per': dft_cfg['per'],
        'spd': dft_cfg['spd'],
        'pit': dft_cfg['pit'],
        'vol': dft_cfg['vol'],
        'aue': dft_cfg['aue'],
        'cuid': dft_cfg['cuid'],
        'lan': 'zh',
        'ctp': 1
    }
    # 向指定URL发送请求，并朗读
    request_api(params)

## 发送请求并处理响应的函数

def request_api(params):
    data = urlencode(params)
    req = Request(dft_cfg['tts_url'], data.encode('utf-8'))
    try:
        f = urlopen(req)
        result_str = f.read()
        headers = dict((name.lower(), value) for name, value in f.headers.items())
        has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0)
    except Exception as e:
        print('asr http response http code : ' + str(e))
        result_str = str(e)
        has_error = True
    if has_error:
        print("tts api  error:" + str(result_str, 'utf-8'))
        request_api(params)
    else:
        # Step 3.4: 保存请求的音频结果并输出成temp.mp3，朗读完毕后删除
        save_file = "error.txt" if has_error else 'temp.' + dft_cfg['format']
        with open(save_file, 'wb') as of:
            of.write(result_str)
        playsound(save_file)
        os.remove(save_file)

## 主程序入口

if __name__ == '__main__':
	# 加载配置文件  
	ini_file = "./config.ini"  
	cfg_name = "default"  
	book = "D:/总要删的/追风筝的人.txt"  
	dft_cfg = load_config(ini_file, cfg_name)  
	# 获取Token  
	token = fetch_token(dft_cfg)  
	# 开始朗读文本  
	main()

本账号所有文章均为原创，欢迎转载，请注明文章出处：https://blog.csdn.net/qq_46106285/article/details/119956917。百度和各类采集站皆不可信，搜索请谨慎鉴别。技术类文章一般都有时效性，本人习惯不定期对自己的博文进行修正和更新，因此请访问出处以查看本文的最新版本。