import requests
import re,os
##请求网页
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
##获取目录章节和对应的链接
def get_info(url):
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
get_info_list = []
html = etree.HTML(response.text)
dd_list = html.xpath('//*[@id="list"]/dl/dd')
for dd in dd_list:
title = dd.xpath('a/text()')[0]
href = url + dd.xpath('a/@href')[0]
chapter = {'title':title,'href':href}
get_info_list.append(chapter)
return get_info_list
##全部存入一个文件中
def get_demo(get_info,txt):
for chapter_info in get_info:
response = requests.get