# -*- coding: utf-8 -*-
# @Time : 2021/8/4 15:42
# @Author : sanchez
# @File : bilibili排行榜
# @Software : Pycharm Community Edition
import requests
from bs4 import BeautifulSoup
import xlwt
start_url = 'https://www.bilibili.com/v/popular/rank/'
data_dict = {
'全站': 'all', '番剧': 'bangumi', '国产动画': 'guochan', '国创相关': 'guochuang', '纪录片': 'documentary', '动画': 'douga',
'音乐': 'music', '舞蹈': 'dance',
'游戏': 'game', '知识': 'knowledge', '科技': 'tech', '运动': 'sports', '汽车': 'car', '生活': 'life', '美食': 'food',
'动物圈': 'animal', '鬼畜': 'kichiku',
'时尚': 'fashion', '娱乐': 'ent', '影视': 'cinephile', '电影': 'movie', '电视剧': 'tv', '原创': 'origin', '新人': 'rookie'
}
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.62'
}
def choose():
data_dict_key = data_dict.keys()
temp = ''
for i in data_dict_key:
temp += i + '|'
print(temp)
choose_data = input('请选择要爬取的板块')
if choose_data not in data_dict_key:
return True
else:
try:
get_response(data_dict[choose_data], choose_data)
except:
print('爬取失败')
return False
pass
def get_response(url_part, i):
url = start_url + url_part
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
# print(response.text)
save_to_excel(response.text, i)
pass
def save_to_excel(res, title):
html = BeautifulSoup(res, 'html.parser')
h = 1
if title == '电影':
sheet = work.add_sheet('bilibili' + title + '视频排行榜')
sheet.write(0, 0, '视频名称')
sheet.write(0, 1, '播放量')
sheet.write(0, 2, '综合得分')
sheet.write(0, 3, '上映时间')
sheet.write(0, 4, '排名')
for i in html.select('ul[class="rank-list pgc-list"] > li[class="rank-item"]'):
sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
sheet.write(h, 3, i.select('div[class="pgc-info"]')[0].get_text().strip())
sheet.write(h, 4, h)
h += 1
elif title == '全站':
sheet = work.add_sheet('bilibili' + title + '视频排行榜')
sheet.write(0, 0, '视频名称')
sheet.write(0, 1, '播放量')
sheet.write(0, 2, '综合得分')
sheet.write(0, 3, '排名')
for i in html.select('ul[class="rank-list"] > li[class="rank-item"]'):
sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
sheet.write(h, 3, h)
h += 1
else:
sheet = work.add_sheet('bilibili' + title + '视频排行榜')
sheet.write(0, 0, '视频名称')
sheet.write(0, 1, '播放量')
sheet.write(0, 2, '综合得分')
sheet.write(0, 3, '排名')
for i in html.select('ul[class="rank-list pgc-list"] > li[class="rank-item"]'):
sheet.write(h, 0, i.select('a[class="title"]')[0].get_text().strip())
sheet.write(h, 1, i.select('span[class="data-box"]')[0].get_text().strip())
sheet.write(h, 2, i.select('div[class="pts"] > div')[0].get_text().strip())
sheet.write(h, 3, h)
h += 1
pass
if __name__ == '__main__':
print('*****欢迎使用bilibili排行榜爬虫*****')
work = xlwt.Workbook(encoding='utf-8')
while True:
ch1 = choose()
if ch1:
print('没有该板块,请重新选择!')
continue
ch2 = input('1,继续;2,退出')
if ch2 == '1':
continue
else:
work.save('bilibili各类视频播放排行榜.xls')
break
Bilibili排行榜爬虫
最新推荐文章于 2023-06-16 00:29:02 发布