简单的一个爬虫
方法一:
from selenium import webdriver
# 创建WebDriver对象
driver = webdriver.Firefox()
# 打开网页
driver.get('https://www.sina.com.cn/')
html=driver.page_source
#print(html)
from lxml import etree
e_html=etree.HTML(html)
a=e_html.xpath('//*[@id="syncad_0"]/ul/li/a/text()')
b=e_html.xpath('//*[@id="syncad_0"]/ul/li/a/@href')
for i in a:
print("新闻:",i)
for j in b