研究生院
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
options = webdriver.ChromeOptions()
options.add_experimental_option('detach', True)
driver = webdriver.Chrome(options=options)
driver.get('https://yjsy.hunnu.edu.cn')
time.sleep(5)
xpath_1 = "//ul[@class='menu']/li[4]/a"
xpath_2 = "//ul[@class='menu']/li[4]/ul/li[2]/a"
button_1 = driver.find_element(By.XPATH, xpath_1)
button_2 = driver.find_element(By.XPATH, xpath_2)
ActionChains(driver).move_to_element(button_1).perform()
time.sleep(5)
ActionChains(driver).move_to_element(button_2).click().perform()
哔哩哔哩1
from selenium import webdriver
url = 'https://www.bilibili.com/video/BV1iN4y1a7KJ'
options = webdriver.ChromeOptions()
options.add_experimental_option('detach', True)
driver = webdriver.Chrome(options=options)
driver.get(url)
import time
time.sleep(5)
html = driver.page_source
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'lxml')
title = soup.find('h1', class_="video-title")
count = soup.find('span', class_="view item")
dm = soup.find('span', class_="dm item")
datetime = soup.find('span', class_="pubdate-text")
comments = soup.find_all('div', class_="content-warp")
comments_text = []
for comment in comments:
name = comment.find('div', class_="user-info").text
text = comment.find('span', class_="reply-content").text
comments_text.append({
'name': name,
'text': text
})
# 输出结果
print(f"标题:{title.text},播放量:{count.text.strip()},弹幕数:{dm.text.strip()}")
for comment in comments_text:
print(f"评论:\nID:{comment['name']},评论内容:{comment['text']}")
driver.close()
哔哩哔哩2
from selenium import webdriver
from selenium.webdriver.common.by import By
#不让浏览器自动关闭
options = webdriver.EdgeOptions()
options.add_experimental_option('detach', True)
driver = webdriver.ChromiumEdge(options=options)
#加载网页,获取源代码
url = 'https://www.bilibili.com/v/popular/all/'
driver.get(url)
#导入BeautifulSoup,筛选数据
from bs4 import BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'lxml')
result = soup.find_all('div', class_='video-card')
for item in result:
title = item.find('p', class_='video-name')
up = item.find('span', class_='up-name__text')
count = item.find('span', class_='play-text')
print(f'视频:{title.text},UP:{up.text},播放量:{count.text.strip()}')