粗略的用selenium爬取相关文章题目

最新推荐文章于 2023-05-04 20:40:02 发布

老傅第三代弟子

最新推荐文章于 2023-05-04 20:40:02 发布

阅读量96

点赞数

分类专栏：爬虫

本文链接：https://blog.csdn.net/qq_51679796/article/details/113345596

版权

爬虫专栏收录该内容

9 篇文章 0 订阅

订阅专栏

from selenium import webdriver
from bs4 import BeautifulSoup
from lxml import  html
import requests
import time

driver=webdriver.Chrome()
driver.maximize_window()
driver.implicitly_wait(10)
driver.get('https://so.csdn.net/so/search?q=selenium&t=&u=')
time.sleep(10)
'''et=html.etree
htmldiv=et.HTML(driver.page_source)
time.sleep(10)
a_tag=htmldiv.xpath('//div/h3')
for i in a_tag:
    title=i.text
    print(title)'''
'''bs=BeautifulSoup(driver.page_source,'lxml')
time.sleep(10)
link_lst=bs.find_all('a',class_="block-title")
for link in link_lst:
    print(link['href'])'''
titles=driver.find_elements_by_class_name("block-title")
time.sleep(10)
f=open('selenium系列标题.text','a')
for title in titles:
    time.sleep(2)


    title.click()
    time.sleep(10)
    windows=driver.window_handles
    time.sleep(2)
    #当前打开的所有窗口
    driver.switch_to.window((windows[-1]))   #切换到最后一个窗口
    time.sleep(10)

    try:
        class_link=driver.find_element_by_class_name('tag-link')
        f.write(class_link.text)
    except:
        biaoti=driver.find_element_by_tag_name('h1')
        f.write(f'无系列标签     {biaoti.text}')
        driver.close()
        windows = driver.window_handles
        driver.switch_to.window(windows[-1])
        continue
    time.sleep(3)
    class_link.click()
    time.sleep(10)
    windows = driver.window_handles
    time.sleep(5)
    driver.switch_to.window(windows[-1])
    time.sleep(5)
    bs=BeautifulSoup(driver.page_source,'lxml')
    time.sleep(2)
    title=bs.find_all('h2',class_='title')
    time.sleep(5)
    for j in title:
        name=j.text

        f.write(name)
    time.sleep(2)
    driver.close()
    time.sleep(2)
    windows=driver.window_handles
    time.sleep(2)
    driver.switch_to.window(windows[-1])
    time.sleep(2)
    driver.close()
    windows=driver.window_handles
    driver.switch_to.window(windows[-1])