from urllib import request
import re
import os
#获取网页源代码
url = 'https://www.xs4.cc/dushizhiwozhendewudi/'
html = request.urlopen(url).read()
html = html.decode('gbk')
#获取章节链接和小说标题
#提取想要的数据 re正则表达式
reg = '
(.*?)'urls = re.findall(reg,html)
reg1='
(.*?)
'dir_name = re.findall(reg1,html)[-1]
print(dir_name)
if not os.path.exists(dir_name):
os.mkdir(dir_name)
for url in urls:
#小说链接
# novel_url = url[0]
#小说标题
# novel_title = url[1]
novel_url,novel_title = url
novel_title = novel_title.replace('正文 ','')
#获取章节源代码
chapt = request.urlopen(novel_u