# -*- coding: utf-8 -*-
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import re
class GetInfo():
def __init__(self):
self.a_list = [] # 存储每一个视频的url
self.d = webdriver.Chrome() # 从路径提取驱动,设置驱动名为d
self.d.get('https://space.bilibili.com/1629347259/video?spm_id_from=333.337.0.0')
def getUrl(self):
ul = self.d.find_element(By.XPATH, '//*[@id="submit-video-list"]/ul[1]')
lis = ul.find_elements(By.XPATH, "li")
for li in lis:
self.a_list.append(li.get_attribute("data-aid"))
with open("url.json", "w+", encoding="utf-8") as f:
data = json.dumps(self.a_list)
f.writelines(data)
def nextPage(self):
d = self.d
totle = d.find_element(By.XPATH, '//*[@id="submit-video-list"]/ul[3]/span[1]')
number = re.findall(r"\d+", totle.text)
totle = int(number[0])
for page in range(1, totle):
try:
self.d.find_element(By.LINK_TEXT, '下一页').click()
time.sleep(2)
self.getUrl()
except Exception as e:
print(f"Failed to click next page: {e}")
return self.a_list
def getVideo(self, urls, start, end):
base_url = "http://www.bilibili.com/video/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
}
video = dict()
with open("video.json", "w", encoding="utf-8") as f:
for url in urls[int(start):int(end)]:
url = base_url + url
try:
req = requests.get(url, headers=headers)
string = req.text
title_pattern = r'<title data-vue-meta="true">([^&]+)</title>'
match = re.search(title_pattern, string)
if match:
title = match.group(1)
else:
title = "未找到匹配的内容"
play_count = re.search(r'视频播放量 (\d+)', string).group(1)
danmu_count = re.search(r'弹幕量 (\d+)', string).group(1)
like_count = re.search(r'点赞数 (\d+)', string).group(1)
coin_count = re.search(r'投硬币枚数 (\d+)', string).group(1)
favorite_count = re.search(r'收藏人数 (\d+)', string).group(1)
share_count = re.search(r'转发人数 (\d+)', string).group(1)
video["url"] = url
video["视频标题"] = title
video["视频播放量"] = play_count
video["弹幕量"] = danmu_count
video["点赞数"] = like_count
video["投硬币枚数"] = coin_count
video["收藏人数"] = favorite_count
video["转发人数"] = share_count
data = json.dumps(video)
f.writelines(data)
except Exception as e:
print(f"Failed to get video info for url {url}: {e}")
if __name__ == '__main__':
obj = GetInfo()
obj.nextPage()
with open("url.json", "r") as f:
data = json.load(f)
import threading
get_data1 = threading.Thread(target=obj.getVideo, args=(data, 0, int(len(data) / 3)))
get_data2 = threading.Thread(target=obj.getVideo,
args=(data, int(len(data) / 3) + 1, 2 / 3 * (int(len(data) / 3) + 1)))
get_data3 = threading.Thread(target=obj.getVideo, args=(data, 2 / 3 * (int(len(data) / 3) + 1), len(data)))
get_data1.start()
get_data2.start()
get_data3.start()
红警HBK08视频播放量
于 2022-10-08 15:33:35 首次发布