import time
from lxml.html import html_to_xhtml
from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import etree
def scroll_to_bottom():
js = "return document.body.scrollHeight"
height = 0 # 初始化现在滚动条所在高度为0
new_height = driver.execute_script(js) # 当前窗口总高度
while height < new_height:
# 将滚动条调整至页面底部
for i in range(height, new_height, 200):
driver.execute_script('window.scrollTo(0, {})'.format(i))
time.sleep(0.5)
height = new_height
time.sleep(0.5)
new_height = driver.execute_script(js)
if condition():
break
# 终止条件
def condition():
try:
driver.find_element(By.XPATH, '/html/body/div[2]/div/div[2]/div[1]/ul/div[100]/div[2]')
return True
except:
return False
# 终止条件
if __name__ == '__main__':
# 初始化一个driver,指定Chrome浏览器
driver = webdriver.Chrome()
driver.get("https://www.bilibili.com/v/popular/all/?spm_id_from=333.1007.0.0")
scroll_to_bottom()
movieList = driver.find_elements(By.CLASS_NAME, 'video-name')[:100]
i = 1
with open("movie_Top100.txt", 'w', encoding="utf-8") as f:
for movie in movieList:
f.writelines(f"NO.{i}{movie.text}\n")
i += 1
driver.quit()
获取b站热门视频前100个标题(使用selenium)
于 2024-11-01 11:50:30 首次发布