获取文本并制造MP3

本文介绍了使用Python实现的自动化程序,通过requests库抓取网络小说,BeautifulSoup解析HTML内容,然后将文本转换为MP3格式,减轻阅读负担,但强调了合法使用技术,避免违法行为。
摘要由CSDN通过智能技术生成
                           #      
此文章方便获取自己想获得的文章内容,并且将文字转为mp3,解轻眼睛的劳累。



#上代码
import requests
from bs4 import BeautifulSoup
import edge_tts
import asyncio
import os.path
import time
from requests.adapters import HTTPAdapter
from requests import packages

start_time = time.time()
# request请求
ur2 = "https://www.biquge365.net/"


# 重连接次数5
# requests.DEFAULT_RETRIES=5
def getur1():
    ur1 = 'https://www.biquge365.net/newbook/12011/'
    #头文件,开发者工具network可查
    heade = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'}

    re = requests.get(url=ur1, headers=heade)

    re.enconding = "utf-8"
    html = re.text
    # beautiful解析
    be = BeautifulSoup(html, "html.parser")
    text = be.find("div", "border")
    chapters = text.find_all("a")
    kuangs = []
    for chapter in chapters:
        name = chapter.string
        url1 = ur2 + chapter.get('href')
        kuang = [url1, name]
        kuangs.append(kuang)
    return kuangs


if __name__ == "__main__":
    newopen = getur1()
    # 写入文本
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'}
    for tar in newopen:
        #防止不必要警告
        requests.packages.urllib3.disable_warnings()
        # 创建新的会话对象
        s = requests.Session()
        # 重试次数为3,1为HTTP协议请求适配,2为HTTP连接适配
        s.mount('http://', HTTPAdapter(max_retries=19))
        s.mount('https://', HTTPAdapter(max_retries=19))
        # 超时时间为10s
        print("h")
        req = s.get(url=tar[0], headers=header, timeout=30, stream=True, verify=False)
        req.enconding = "utf-8"
        html = req.text
        print("漂亮")
        # beautiful解析
        be = BeautifulSoup(html, "html.parser")
        texts = be.find("div", id="txt")
        texts_list = texts.text.split('\xa0' * 4)
        if os.path.exists("D:/novel/" + tar[1] + ".txt"):
            print(tar[0])
            pass
        else:
            with open("D:/novel/" + tar[1] + ".txt", "w", encoding='gbk') as file:
                for line in texts_list:
                    file.write(line + "\n")
                    print("i")
        # edge_tts准备
        volume = "+0%"
        rate = "-4%"
        voice = "zh-CN-YunxiNeural"
        output = "D:/mp3/" + tar[1] + ".mp3"
        # 打开文本转化为mp3
        if os.path.exists("D:/mp3/" + tar[1] + ".mp3"):
            print(tar[0] + "12")
            pass
        else:
            with open("D:/novel/" + tar[1] + ".txt", "r", encoding="gbk") as f:
                text = f.read()
                print(text)
# 线程准备,读取超时。。。
                try:
                  async def get():
                    ok = edge_tts.Communicate(text=text, volume=volume, voice=voice, rate=rate)
                    await asyncio.wait_for(ok.save(output), timeout=1000)
                except asyncio.exceptions.CancelledError:
                    print("error has happend")
                    raise
                asyncio.run(get())
end_time = time.time()
#进行时间打出
all = end_time - start_time
print(all)

                                                                      成果

注意:

请不要用此技术做违法行为

一切违法行为均与本人无关

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值