粗略的用selenium爬取相关文章题目

from selenium import webdriver
from bs4 import BeautifulSoup
from lxml import  html
import requests
import time

driver=webdriver.Chrome()
driver.maximize_window()
driver.implicitly_wait(10)
driver.get('https://so.csdn.net/so/search?q=selenium&t=&u=')
time.sleep(10)
'''et=html.etree
htmldiv=et.HTML(driver.page_source)
time.sleep(10)
a_tag=htmldiv.xpath('//div/h3')
for i in a_tag:
    title=i.text
    print(title)'''
'''bs=BeautifulSoup(driver.page_source,'lxml')
time.sleep(10)
link_lst=bs.find_all('a',class_="block-title")
for link in link_lst:
    print(link['href'])'''
titles=driver.find_elements_by_class_name("block-title")
time.sleep(10)
f=open('selenium系列标题.text','a')
for title in titles:
    time.sleep(2)


    title.click()
    time.sleep(10)
    windows=driver.window_handles
    time.sleep(2)
    #当前打开的所有窗口
    driver.switch_to.window((windows[-1]))   #切换到最后一个窗口
    time.sleep(10)

    try:
        class_link=driver.find_element_by_class_name('tag-link')
        f.write(class_link.text)
    except:
        biaoti=driver.find_element_by_tag_name('h1')
        f.write(f'无系列标签     {biaoti.text}')
        driver.close()
        windows = driver.window_handles
        driver.switch_to.window(windows[-1])
        continue
    time.sleep(3)
    class_link.click()
    time.sleep(10)
    windows = driver.window_handles
    time.sleep(5)
    driver.switch_to.window(windows[-1])
    time.sleep(5)
    bs=BeautifulSoup(driver.page_source,'lxml')
    time.sleep(2)
    title=bs.find_all('h2',class_='title')
    time.sleep(5)
    for j in title:
        name=j.text

        f.write(name)
    time.sleep(2)
    driver.close()
    time.sleep(2)
    windows=driver.window_handles
    time.sleep(2)
    driver.switch_to.window(windows[-1])
    time.sleep(2)
    driver.close()
    windows=driver.window_handles
    driver.switch_to.window(windows[-1])
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值