import requests
from bs4 import BeautifulSoup
def geturl():
url = 'https://www.biqg.cc/book/6909/' # 目标访问网站url
header = {"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"}
req = requests.get(url = url, headers = header)
req.encoding = "utf-8"
html = req.text
bes = BeautifulSoup(html,"lxml")
texts = bes.find("div", class_="listmain")
chapters = texts.find_all("a") #该函数可以返回list下的标签为a的所有信息
words = [] #创建空的列表,存入每章节的url与章节名称
##对标签a内的内容进行提取
for chapter in chapters:
name = chapter.string #取出字符串,可以看出字符串只有章节号与章节名称,刚好符合我们所需
#其中存在一条这个语句,我们需要把他剔除,所以加入if判断<a rel="nofollow" href="javascript:dd_show()"><<---展开全部章节---&g
python爬虫之下载小说(3)
最新推荐文章于 2024-08-17 16:35:51 发布