Bilibili排行榜爬虫

最新推荐文章于 2023-06-16 00:29:02 发布

҉天҉涯҉沦҉落҉人҉

最新推荐文章于 2023-06-16 00:29:02 发布

阅读量225

点赞数

分类专栏：笔记文章标签： python 爬虫

本文链接：https://blog.csdn.net/qq_52895917/article/details/119425680

版权

笔记专栏收录该内容

4 篇文章 0 订阅

订阅专栏

# -*- coding: utf-8 -*-
# @Time : 2021/8/4 15:42
# @Author : sanchez
# @File : bilibili排行榜
# @Software : Pycharm Community Edition
import requests
from bs4 import BeautifulSoup
import xlwt


start_url = 'https://www.bilibili.com/v/popular/rank/'
data_dict = {
    '全站': 'all', '番剧': 'bangumi', '国产动画': 'guochan', '国创相关': 'guochuang', '纪录片': 'documentary', '动画': 'douga',
    '音乐': 'music', '舞蹈': 'dance',
    '游戏': 'game', '知识': 'knowledge', '科技': 'tech', '运动': 'sports', '汽车': 'car', '生活': 'life', '美食': 'food',
    '动物圈': 'animal', '鬼畜': 'kichiku',
    '时尚': 'fashion', '娱乐': 'ent', '影视': 'cinephile', '电影': 'movie', '电视剧': 'tv', '原创': 'origin', '新人': 'rookie'
}
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.62'
}


def choose():
    data_dict_key = data_dict.keys()
    temp = ''
    for i in data_dict_key:
        temp += i + '|'
    print(temp)
    choose_data = input('请选择要爬取的板块')
    if choose_data not in data_dict_key:
        return True
    else:
        try:
            get_response(data_dict[choose_data], choose_data)
        except:
            print('爬取失败')
        return False
    pass


def get_response(url_part, i):
    url = start_url + url_part
    response = requests.get(url=url, headers=headers)
    response.encoding = 'utf-8'
    # print(response.text)
    save_to_excel(response.text, i)
    pass


def save_to_excel(res, title):
    html = BeautifulSoup(res, 'html.parser')
    h = 1
    if title == '电影':
        sheet = work.add_sheet('bilibili' + title + '视频排行榜')
        sheet.write(0, 0, '视频名称')
        sheet.write(0, 1, '播放量')
        sheet.write(0, 2, '综合得分')
        sheet.write(0, 3, '上映时间')
        sheet.write(0, 4, '排名')
        for i in html.select('ul[class="rank-list pgc-list"] > li[class="rank-item"]'):
            sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
            sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
            sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
            sheet.write(h, 3, i.select('div[class="pgc-info"]')[0].get_text().strip())
            sheet.write(h, 4, h)
            h += 1
    elif title == '全站':
        sheet = work.add_sheet('bilibili' + title + '视频排行榜')
        sheet.write(0, 0, '视频名称')
        sheet.write(0, 1, '播放量')
        sheet.write(0, 2, '综合得分')
        sheet.write(0, 3, '排名')
        for i in html.select('ul[class="rank-list"] > li[class="rank-item"]'):
            sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
            sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
            sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
            sheet.write(h, 3, h)
            h += 1
    else:
        sheet = work.add_sheet('bilibili' + title + '视频排行榜')
        sheet.write(0, 0, '视频名称')
        sheet.write(0, 1, '播放量')
        sheet.write(0, 2, '综合得分')
        sheet.write(0, 3, '排名')
        for i in html.select('ul[class="rank-list pgc-list"] > li[class="rank-item"]'):
            sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
            sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
            sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
            sheet.write(h, 3, h)
            h += 1
    pass


if __name__ == '__main__':
    print('*****欢迎使用bilibili排行榜爬虫*****')
    work = xlwt.Workbook(encoding='utf-8')
    while True:
        ch1 = choose()
        if ch1:
            print('没有该板块，请重新选择！')
            continue
        ch2 = input('1,继续;2,退出')
        if ch2 == '1':
            continue
        else:
            work.save('bilibili各类视频播放排行榜.xls')
            break