获取B站视频排行榜的一些数据,例如标题、UP主、分区、BV号、播放量、弹幕量、评论量、综合得分等
同时将视频封面保存到本地
import re
import requests
import os
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
url="https://www.bilibili.com/v/popular/rank/all"
img_list=[] # 图片链接
name_list=[] # 标题
tname_list=[] # 分区
view_list=[] # 播放量
up_list=[] # UP名
pinglun_list=[] # 评论量
socre_list=[] # 综合得分
bv_list=[] # bv号
def get_URL():
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'}
response = requests.get(url=url, headers=headers, verify=False)
content = response.content.decode('utf-8')
return content
def get_IMG():
content=get_URL()
pattern = re.compile(r'"pic":"(.*?)","title"')
imgdata_list = pattern.findall(content)
for img in imgdata_list:
img = img.encode('utf-8').decode('unicode_escape')
img_list.append(img)
print("封面链接: ")
print(img_list)
def get_otherinfo():
content = get_URL()
# 视频标题
pattern_name = re.compile(r'class="title">(.*?)</a>')
name_list = pattern_name.findall(content)
print("视频标题: ")
print(name_list)
# UP主名称
pattern_upname = re.compile(r'"name":"(.*?)"')
up_list = pattern_upname.findall(content)
print("UP主名称: ")
print(up_list)
# 视频类型/分区
pattern_tname = re.compile(r'"tname":"(.*?)"')
tname_list = pattern_tname.findall(content)
print("视频分区:")
print(tname_list)
# 播放量
pattern_view = re.compile(r'"view":(.*?)"')
view_list = pattern_view.findall(content)
print("播放量:")
print(view_list)
# 弹幕量
pattern_danmu = re.compile(r'"danmaku":(.*?)"')
danmu_list = pattern_danmu.findall(content)
print("弹幕量:")
print(danmu_list)
# BV号
pattern_bv = re.compile(r'com/video/(.*?)" target="')
bv_list = pattern_bv.findall(content)
print("BV号:")
print(bv_list)
# 综合得分
pattern_bv = re.compile(r'<div>(.*?)</div>综合得分')
bv_list = pattern_bv.findall(content)
print("综合得分:")
print(bv_list)
# "reply":114550,"favorite":76574,"coin":320357,"share":64088,""like":362596,"dislike":0}
# 回复,收藏,硬币,分享,点赞,踩
def download():
root = "pic/"
for i in range(len(img_list)):
path = root+str(i)+"."+'png'
# 判断文件夹是否存在
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
# 发送请求
r = requests.get(img_list[i])
with open(path, 'wb') as f:
f.write(r.content)
f.close()
if __name__ == '__main__':
get_IMG()
download()
get_otherinfo()