1爬取小说:
测试地址:https://www.qidian.com/
将小说保存到本地
直接上代码了
import requests
from lxml import etree
import os
# 设计模式 -- 面向对象
class Spider(object):
def state_request(self):
# 请求网站,获取小说名,小说链接,创建文件夹
response = requests.get('https://www.qidian.com/all') #请求网址获取相应
xml = etree.HTML(response.text) #整理xml文档对象
novel_list = xml.xpath('//div[@class="book-mid-info"]/h4/a/text()') #获取小说名
novel_url_list = xml.xpath('//div[@class="book-mid-info"]/h4/a/@href') #获取小说链接
for novel, novel_url in zip(novel_list, novel_url_list):
if os.path.exists(novel) == False:
os.mkdir(novel)
self.next_section(novel, novel_url)
def next_section(self, novel, novel_url):
# 请求小说获取html源码,获取章节名,章节链接
response = requests.get('http:' + novel_url)
xml = etree.HTML(response.text)
section_list = xml.xpath('//ul[@class="cf"]/li/a/text()') #小说章节名
section_url_list = xml.xpath('//ul[@class="cf"]/li/a/@href') #小说章节链接
for section, section_url in zip(section_list, section_url_list):
self.finally_file(novel, section, section_url)
def finally_file(self, novel, section, section_url):
response = requests.get('http:' + section_url)
xml = etree.HTML(response.text)
content = "\n".join(xml.xpath('//div[@class="read-content j_readContent"]/p/text()'))
fileName = novel + "\\" + section + ".txt"
print("正在保存小说文件:" + fileName)
with open(fileName, 'w', encoding='utf-8') as f:
f.write(content)
spider = Spider()
spider.state_request()
以上是爬虫全部代码
2,语音合成
直接上代码了
需要先在 百度ai开放平台注册:https://login.bce.baidu.com/?account=
# 发音人选择, 基础音库:0为度小美,1为度小宇,3为度逍遥,4为度丫丫,
# 精品音库:5为度小娇,103为度米朵,106为度博文,110为度小童,111为度小萌,默认为度小美
PER = 0;
#语速,取值0-9,默认为5中语速
SDP = 5;
#音调,取值0-9,默认为5中语调
PIT = 5;
#音量,取值0-9,默认为5中音量
VOL = 5;
from aip import AipSpeech
app_id = '17370766'
api_key = 'icSpdlysLxpPYe5QbCMNhxvY'
secret_key = 'XwnoVuHrVLioKDc2LlgTOPwQVSTVwb5L'
client = AipSpeech(app_id, api_key, secret_key)
result = client.synthesis("烈日当空,灼烧大地,尽管已经是八月末了,但炎热的夏季依旧在散发着阵阵余威。", "zh", 2, {
"vol": 9, #音量
"spd": 3.5, #语速
"pit": 3, #语调
"per": 5, #音色
})
with open("audio.mp3", "wb") as f:
f.write(result)
以上爬虫用到的核心技术:
requests 请求对象
etree 整理xml文档对象
xpath 定位获取信息
os 创建文件夹
语音合成用到的核心技术:
百度的aip
安装方式两种:
pip install aip
conda install aip
前提:使用conda命令之前需要有Anaconda运行环境
谁有更简单暴力的方法可以下方留言一起进步!!!