python人工智能小说爬取 - - 语音合成_免费爬取小说ai-CSDN博客

本文链接：https://blog.csdn.net/qq_42543264/article/details/101624052

将小说保存到本地

直接上代码了

import requests
from lxml import etree
import os

# 设计模式  --  面向对象


class Spider(object):
    def state_request(self):
        # 请求网站,获取小说名,小说链接,创建文件夹
        response = requests.get('https://www.qidian.com/all')       #请求网址获取相应
        xml = etree.HTML(response.text)         #整理xml文档对象
        novel_list = xml.xpath('//div[@class="book-mid-info"]/h4/a/text()')      #获取小说名
        novel_url_list = xml.xpath('//div[@class="book-mid-info"]/h4/a/@href')      #获取小说链接
        for novel, novel_url in zip(novel_list, novel_url_list):
            if os.path.exists(novel) == False:
                os.mkdir(novel)

            self.next_section(novel, novel_url)

    def next_section(self, novel, novel_url):
        # 请求小说获取html源码,获取章节名,章节链接
        response = requests.get('http:' + novel_url)
        xml = etree.HTML(response.text)
        section_list = xml.xpath('//ul[@class="cf"]/li/a/text()')       #小说章节名
        section_url_list = xml.xpath('//ul[@class="cf"]/li/a/@href')    #小说章节链接
        for section, section_url in zip(section_list, section_url_list):
            self.finally_file(novel, section, section_url)

    def finally_file(self, novel, section, section_url):
        response = requests.get('http:' + section_url)
        xml = etree.HTML(response.text)
        content = "\n".join(xml.xpath('//div[@class="read-content j_readContent"]/p/text()'))
        fileName = novel + "\\" + section + ".txt"
        print("正在保存小说文件:" + fileName)
        with open(fileName, 'w', encoding='utf-8') as f:
            f.write(content)


spider = Spider()
spider.state_request()

以上是爬虫全部代码

2，语音合成

直接上代码了

需要先在百度ai开放平台注册：https://login.bce.baidu.com/?account=

# 发音人选择, 基础音库：0为度小美，1为度小宇，3为度逍遥，4为度丫丫，
# 精品音库：5为度小娇，103为度米朵，106为度博文，110为度小童，111为度小萌，默认为度小美 
PER = 0;
#语速，取值0-9，默认为5中语速
SDP = 5;
#音调，取值0-9，默认为5中语调
PIT = 5;
#音量，取值0-9，默认为5中音量
VOL = 5;

from aip import AipSpeech

app_id = '17370766'
api_key = 'icSpdlysLxpPYe5QbCMNhxvY'
secret_key = 'XwnoVuHrVLioKDc2LlgTOPwQVSTVwb5L'

client = AipSpeech(app_id, api_key, secret_key)

result = client.synthesis("烈日当空，灼烧大地，尽管已经是八月末了，但炎热的夏季依旧在散发着阵阵余威。", "zh", 2, {
    "vol": 9,   #音量
    "spd": 3.5,   #语速
    "pit": 3,   #语调
    "per": 5,   #音色
})

with open("audio.mp3", "wb") as f:
    f.write(result)

以上爬虫用到的核心技术：

requests 请求对象

etree 整理xml文档对象

xpath 定位获取信息

os 创建文件夹

语音合成用到的核心技术：
百度的aip

安装方式两种：

pip install aip

conda install aip

前提：使用conda命令之前需要有Anaconda运行环境

谁有更简单暴力的方法可以下方留言一起进步！！！