from selenium import webdriver
from bs4 import BeautifulSoup
from lxml import html
import requests
import time
driver=webdriver.Chrome()
driver.maximize_window()
driver.implicitly_wait(10)
driver.get('https://so.csdn.net/so/search?q=selenium&t=&u=')
time.sleep(10)
'''et=html.etree
htmldiv=et.HTML(driver.page_source)
time.sleep(10)
a_tag=htmldiv.xpath('//div/h3')
for i in a_tag:
title=i.text
print(title)'''
'''bs=BeautifulSoup(driver.page_source,'lxml')
time.sleep(10)
link_lst=bs.find_all('a',class_="block-title")
for link in link_lst:
print(link['href'])'''
titles=driver.find_elements_by_class_name("block-title")
time.sleep(10)
f=open('selenium系列标题.text','a')
for title in titles:
time.sleep(2)
title.click()
time.sleep(10)
windows=driver.window_handles
time.sleep(2)
#当前打开的所有窗口
driver.switch_to.window((windows[-1])) #切换到最后一个窗口
time.sleep(10)
try:
class_link=driver.find_element_by_class_name('tag-link')
f.write(class_link.text)
except:
biaoti=driver.find_element_by_tag_name('h1')
f.write(f'无系列标签 {biaoti.text}')
driver.close()
windows = driver.window_handles
driver.switch_to.window(windows[-1])
continue
time.sleep(3)
class_link.click()
time.sleep(10)
windows = driver.window_handles
time.sleep(5)
driver.switch_to.window(windows[-1])
time.sleep(5)
bs=BeautifulSoup(driver.page_source,'lxml')
time.sleep(2)
title=bs.find_all('h2',class_='title')
time.sleep(5)
for j in title:
name=j.text
f.write(name)
time.sleep(2)
driver.close()
time.sleep(2)
windows=driver.window_handles
time.sleep(2)
driver.switch_to.window(windows[-1])
time.sleep(2)
driver.close()
windows=driver.window_handles
driver.switch_to.window(windows[-1])
粗略的用selenium爬取相关文章题目
最新推荐文章于 2023-05-04 20:40:02 发布