import requests
from lxml import etree
count = 0
while (count<300):
count+=1
respone = requests.get(
url='http://jandan.net/page/%d' % (count),
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
}
)
eroot = etree.HTML(respone.text)
div_list = eroot.xpath('//div[@class="indexs"]')
for div in div_list:
item = {}
item["新闻"] = div.xpath('./h2/a/text()')
print(item)
练习爬取煎蛋网新闻-Python
最新推荐文章于 2020-06-12 19:30:03 发布