import requests
from lxml import etree
url = 'https://bbs.hupu.com/topic-2'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
response = requests.get(url=url,headers=headers)
response.encoding =response.apparent_encoding
html = etree.HTML(response.text)
Title =html.xpath('//ul[@class="for-list"]/li//div[@class="titlelink box"]/a/text()')
Href =html.xpath('//ul[@class="for-list"]/li//div[@class="titlelink box"]/a/@href')
print(len(Title))
data = []
for i in range(0,len(Title)):
news ={
'header':Title[i],
'links':Href[i]
}
data.append(news)
for i in data:
print('标题:'+i['header']+'\t'+'链接:'+i['links']+'\n')
print('爬取结束!')