准备
安装Python以及必要的模块(requests,xpath)
思路
- 获取网站url 采用异常处理try
- 解析内容获得到作者 章节
- 编辑文件写入章节 作者
- 进一步获取文章内容
- 编辑文件继续写入
代码
import requests
import time
import sys
from lxml import etree
from urllib import parse
# 首先获取访问网站的URl
def get_content(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
}
r = requests.get(url=url, headers=headers)
r.encoding = 'utf-8'
content = r.text
# print(content)
return content
except: # 反应错误信息
s = sys.exc_info()
print("Error '%s' happened on line %d" % (s[1], s[2].tb_lineno))
return " ERROR "
def get_analysis(content):
ele = etree.HTML(content.encode('utf-8'))
# print(type(ele))
result = ele.xpath("//div[@class='result-game-item-detail']")
if (len(result) == 0):
print('没有找到你想要的小说')
exit()
i = 0
index = []
for result in result:
i = i + 1
print(i)
href = result.xpath("h3/a/@href")[0]
title = result.xpath("h3/a/@title")[0]
author = result.xpath("div/p/span[2]/text()")[0]
index.append(href)
print(href, title, author)
bookIndex = input('你选择下载那一篇小说(输入序号数字)')
if bookIndex.isdigit(): # isdigit是验证是否是整数
bookIndex = int(bookIndex)
bookIist = get_content(index[bookIndex - 1])
ele2 = etree.HTML(bookIist.encode('utf-8'))
chapter = ele2.xpath("//div[@id='list']/dl/dd/a/@href")
print(len(chapter))
n = 0
print(type(chapter))
for chapter in chapter:
chapter1 = 'https://www.xsbiquge.com/' + chapter[n]
n = n + 1
print(chapter1)
chapterName = ele2.xpath("//div[@id='list']/dl/dd/a/text()")
# print('https://www.xsbiquge.com' + chapter[n])
# print(chapter)
for chapter in chapter:
tmpeUrl = 'https://www.xsbiquge.com' + chapter[n]
print(tmpeUrl)
mainbodyUrl = get_content(tmpeUrl)
ele3 = etree.HTML(mainbodyUrl.encode('utf-8'))
mainbody = ele3.xpath("//div[@id='content']/text()")
finishedProduct = "\n".join(mainbody)
global chapterName2
chapterName2 = chapterName[n]
save(finishedProduct)
else:
print("请正确输入")
exit()
def save(fil):
filename = books + ".txt"
f = open(filename, "a+", encoding='utf-8')
f.write(chapterName2 + '\n')
f.write(fil + '\n')
f.close
start_time = time.time()
books = input('你要搜索的小说是:')
ret = parse.quote(books)
url = 'https://www.xsbiquge.com/search.php?keyword=' + ret
print(url)
content = get_content(url)
get_analysis(content)
end_time = time.time()
project_time = end_time - start_time
print('程序用时', project_time)
失败:
在弄正文哪一步失败不知道咋搞。
代码越弄越乱
目标:
明天继续。。。吐了