import requests,re,os
dir_name='guimi'
if not os.path.exists('guimi'):
os.makedirs('guimi')
response=requests.get('https://www.bqg99.cc/book/1476058/')
# print(response.status_code)打印状态码,200继续,否则停止。
# print(response.apparent_encoding) 查看网页的编码格式
html=response.text
# print(html)
regular=re.compile('<dd><a href ="(.*?)">(.*?)</a></dd>')
urls=re.findall(regular,html)
# print(urls)
for url in urls:
novel_title=url[1]
print(novel_title)
response=requests.get(url[0])
html=response.text
# print(html_chapter)
regular=re.compile('<div id="content" class="showtxt">(.*?)</div>',re.S)
novel_content=re.findall(regular,html)
novel_content=novel_content[0]
novel_content=novel_content.replace(r'<br />','')
novel_content=novel_content.replace('请记住本书首发域名:www.bqg99.cc。笔趣阁手机版更新最快网址:m.bqg99.cc','')
# print(novel_content)
with open(dir_name+'/'+'{}.txt'.format(novel_title),'w') as f:
f.write(novel_content)
利用python爬取小说诡秘之主
最新推荐文章于 2024-07-12 19:06:42 发布