import requests
import parsel
import re
import os
url = 'https://b.faloo.com/724903.html'
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response_1 = requests.get(url=url,headers=headers)
response_1.encoding = response_1.apparent_encoding
select_1 = parsel.Selector(response_1.text)
href_list = select_1.css('.DivTd3 a::attr(href)').getall()
Title = select_1.css('#novelName::text').get()
print(Title)
print(href_list)
filename = f'{Title}\\'
if not os.path.exists(filename):
os.mkdir(filename)
for href in href_list:
href = 'https:'+href
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url=href,headers=headers)
response.encoding = response.apparent_encoding
# print(response.text)
selector = parsel.Selector(response.text)
title = re.findall('<h1>玄幻:我!天命大反派 (.*?)</h1>',response.text)[0]
content = '\n'.join(selector.css('.noveContent p ::text').getall())
with open(filename+title+'.txt',mode='w',encoding='utf-8') as f:
f.write(content)
结果展现:
总结:
1.几个快捷进入开发者工具的指令:
打开开发者工具方法:F12(键盘)/fn+f12/ctrl+shift+i
2.列表转字符串方法:
str.join(列表)
如'\n'.join(selector.css('.noveContent p ::text').getall())