获取文本并制造MP3

君莫视

已于 2024-03-15 12:54:35 修改

阅读量919

点赞数 30

分类专栏：抓取小说制造mp3 文章标签： python

于 2024-03-01 15:55:39 首次发布

本文链接：https://blog.csdn.net/z2448244313/article/details/136398721

版权

抓取小说制造mp3 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

本文介绍了使用Python实现的自动化程序，通过requests库抓取网络小说，BeautifulSoup解析HTML内容，然后将文本转换为MP3格式，减轻阅读负担，但强调了合法使用技术，避免违法行为。

摘要由CSDN通过智能技术生成

                           #      
此文章方便获取自己想获得的文章内容，并且将文字转为mp3，解轻眼睛的劳累。



#上代码
import requests
from bs4 import BeautifulSoup
import edge_tts
import asyncio
import os.path
import time
from requests.adapters import HTTPAdapter
from requests import packages

start_time = time.time()
# request请求
ur2 = "https://www.biquge365.net/"


# 重连接次数5
# requests.DEFAULT_RETRIES=5
def getur1():
    ur1 = 'https://www.biquge365.net/newbook/12011/'
    #头文件，开发者工具network可查
    heade = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'}

    re = requests.get(url=ur1, headers=heade)

    re.enconding = "utf-8"
    html = re.text
    # beautiful解析
    be = BeautifulSoup(html, "html.parser")
    text = be.find("div", "border")
    chapters = text.find_all("a")
    kuangs = []
    for chapter in chapters:
        name = chapter.string
        url1 = ur2 + chapter.get('href')
        kuang = [url1, name]
        kuangs.append(kuang)
    return kuangs


if __name__ == "__main__":
    newopen = getur1()
    # 写入文本
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'}
    for tar in newopen:
        #防止不必要警告
        requests.packages.urllib3.disable_warnings()
        # 创建新的会话对象
        s = requests.Session()
        # 重试次数为3，1为HTTP协议请求适配，2为HTTP连接适配
        s.mount('http://', HTTPAdapter(max_retries=19))
        s.mount('https://', HTTPAdapter(max_retries=19))
        # 超时时间为10s
        print("h")
        req = s.get(url=tar[0], headers=header, timeout=30, stream=True, verify=False)
        req.enconding = "utf-8"
        html = req.text
        print("漂亮")
        # beautiful解析
        be = BeautifulSoup(html, "html.parser")
        texts = be.find("div", id="txt")
        texts_list = texts.text.split('\xa0' * 4)
        if os.path.exists("D:/novel/" + tar[1] + ".txt"):
            print(tar[0])
            pass
        else:
            with open("D:/novel/" + tar[1] + ".txt", "w", encoding='gbk') as file:
                for line in texts_list:
                    file.write(line + "\n")
                    print("i")
        # edge_tts准备
        volume = "+0%"
        rate = "-4%"
        voice = "zh-CN-YunxiNeural"
        output = "D:/mp3/" + tar[1] + ".mp3"
        # 打开文本转化为mp3
        if os.path.exists("D:/mp3/" + tar[1] + ".mp3"):
            print(tar[0] + "12")
            pass
        else:
            with open("D:/novel/" + tar[1] + ".txt", "r", encoding="gbk") as f:
                text = f.read()
                print(text)
# 线程准备,读取超时。。。
                try:
                  async def get():
                    ok = edge_tts.Communicate(text=text, volume=volume, voice=voice, rate=rate)
                    await asyncio.wait_for(ok.save(output), timeout=1000)
                except asyncio.exceptions.CancelledError:
                    print("error has happend")
                    raise
                asyncio.run(get())
end_time = time.time()
#进行时间打出
all = end_time - start_time
print(all)

成果