代码如下:
import requests
from lxml import etree
# 获取链接
url = 'https://www.*****.com/tuili/9696/xxxxx.html'
while True:
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
resp = requests.get(url, headers=headers)
resp.encoding = 'gbk'
e = etree.HTML(resp.text)
info = e.xpath('//tr/td/p/text()')
title = e.xpath('//tr/td/strong/font/text()')[0]
next_url = e.xpath('//td/strong/a/@href')[2]
url = f'https://www.*****.com/tuili/9696/{next_url}'
#print(info)
print(title)
info_2 = ' '.join(info)
#print(info_2)
with open('xiaoshuo.txt', mode = 'a', encoding='utf-8') as f:
f.write('#'+title+'\n'+info_2+'\n\n')
if next_url == './':
break