红警HBK08视频播放量

# -*- coding: utf-8 -*-

import json
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import re


class GetInfo():
    def __init__(self):
        self.a_list = []  # 存储每一个视频的url
        self.d = webdriver.Chrome()  # 从路径提取驱动,设置驱动名为d
        self.d.get('https://space.bilibili.com/1629347259/video?spm_id_from=333.337.0.0')

    def getUrl(self):
        ul = self.d.find_element(By.XPATH, '//*[@id="submit-video-list"]/ul[1]')
        lis = ul.find_elements(By.XPATH, "li")
        for li in lis:
            self.a_list.append(li.get_attribute("data-aid"))
        with open("url.json", "w+", encoding="utf-8") as f:
            data = json.dumps(self.a_list)
            f.writelines(data)

    def nextPage(self):
        d = self.d
        totle = d.find_element(By.XPATH, '//*[@id="submit-video-list"]/ul[3]/span[1]')
        number = re.findall(r"\d+", totle.text)
        totle = int(number[0])

        for page in range(1, totle):
            try:
                self.d.find_element(By.LINK_TEXT, '下一页').click()
                time.sleep(2)
                self.getUrl()
            except Exception as e:
                print(f"Failed to click next page: {e}")

        return self.a_list

    def getVideo(self, urls, start, end):
        base_url = "http://www.bilibili.com/video/"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
        }
        video = dict()
        with open("video.json", "w", encoding="utf-8") as f:
            for url in urls[int(start):int(end)]:
                url = base_url + url
                try:
                    req = requests.get(url, headers=headers)
                    string = req.text
                    title_pattern = r'<title data-vue-meta="true">([^&amp;]+)</title>'
                    match = re.search(title_pattern, string)

                    if match:
                        title = match.group(1)
                    else:
                        title = "未找到匹配的内容"
                    play_count = re.search(r'视频播放量 (\d+)', string).group(1)
                    danmu_count = re.search(r'弹幕量 (\d+)', string).group(1)
                    like_count = re.search(r'点赞数 (\d+)', string).group(1)
                    coin_count = re.search(r'投硬币枚数 (\d+)', string).group(1)
                    favorite_count = re.search(r'收藏人数 (\d+)', string).group(1)
                    share_count = re.search(r'转发人数 (\d+)', string).group(1)
                    video["url"] = url
                    video["视频标题"] = title
                    video["视频播放量"] = play_count
                    video["弹幕量"] = danmu_count
                    video["点赞数"] = like_count
                    video["投硬币枚数"] = coin_count
                    video["收藏人数"] = favorite_count
                    video["转发人数"] = share_count
                    data = json.dumps(video)
                    f.writelines(data)
                except Exception as e:
                    print(f"Failed to get video info for url {url}: {e}")


if __name__ == '__main__':
    obj = GetInfo()
    obj.nextPage()
    with open("url.json", "r") as f:
        data = json.load(f)
    import threading

    get_data1 = threading.Thread(target=obj.getVideo, args=(data, 0, int(len(data) / 3)))
    get_data2 = threading.Thread(target=obj.getVideo,
                                 args=(data, int(len(data) / 3) + 1, 2 / 3 * (int(len(data) / 3) + 1)))
    get_data3 = threading.Thread(target=obj.getVideo, args=(data, 2 / 3 * (int(len(data) / 3) + 1), len(data)))
    get_data1.start()
    get_data2.start()
    get_data3.start()

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值