爬虫实战设计

研究生院

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver import ActionChains

import time

options = webdriver.ChromeOptions()

options.add_experimental_option('detach', True)

driver = webdriver.Chrome(options=options)

driver.get('https://yjsy.hunnu.edu.cn')

time.sleep(5)

xpath_1 = "//ul[@class='menu']/li[4]/a"

xpath_2 = "//ul[@class='menu']/li[4]/ul/li[2]/a"

button_1 = driver.find_element(By.XPATH, xpath_1)

button_2 = driver.find_element(By.XPATH, xpath_2)

ActionChains(driver).move_to_element(button_1).perform()

time.sleep(5)

ActionChains(driver).move_to_element(button_2).click().perform()

哔哩哔哩1

from selenium import webdriver

url = 'https://www.bilibili.com/video/BV1iN4y1a7KJ'

options = webdriver.ChromeOptions()

options.add_experimental_option('detach', True)

driver = webdriver.Chrome(options=options)

driver.get(url)

import time

time.sleep(5)

html = driver.page_source

from bs4 import BeautifulSoup

soup = BeautifulSoup(html, 'lxml')

title = soup.find('h1', class_="video-title")

count = soup.find('span', class_="view item")

dm = soup.find('span', class_="dm item")

datetime = soup.find('span', class_="pubdate-text")

comments = soup.find_all('div', class_="content-warp")

comments_text = []

for comment in comments:

    name = comment.find('div', class_="user-info").text

    text = comment.find('span', class_="reply-content").text

    comments_text.append({

        'name': name,

        'text': text

    })

# 输出结果

print(f"标题:{title.text},播放量:{count.text.strip()},弹幕数:{dm.text.strip()}")

for comment in comments_text:

    print(f"评论:\nID:{comment['name']},评论内容:{comment['text']}")

driver.close()

哔哩哔哩2

from selenium import webdriver

from selenium.webdriver.common.by import By

#不让浏览器自动关闭

options = webdriver.EdgeOptions()

options.add_experimental_option('detach', True)

driver = webdriver.ChromiumEdge(options=options)

#加载网页,获取源代码

url = 'https://www.bilibili.com/v/popular/all/'

driver.get(url)

#导入BeautifulSoup,筛选数据

from bs4 import BeautifulSoup

soup = BeautifulSoup(driver.page_source, 'lxml')

result = soup.find_all('div', class_='video-card')

for item in result:

    title = item.find('p', class_='video-name')

    up = item.find('span', class_='up-name__text')

    count = item.find('span', class_='play-text')

    print(f'视频:{title.text},UP:{up.text},播放量:{count.text.strip()}')




 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值