腾讯新闻爬取
from selenium import webdriver
from lxml import etree
import time
url = 'https://news.qq.com/'
bro = webdriver.Chrome()
bro.get(url)
for i in range(5):
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(3)
res = bro.page_source
tree = etree.HTML(res)
lis = tree.xpath('//ul[@class="list"]/li')
for li in lis:
try:
title = li.xpath('./div[@class="detail"]/h3/a/text()')[0]
url = li.xpath('./div[@class="detail"]/h3/a/@href')[0]
print(title,url)
except:
continue
结果: