注:源码来源及参考主要来源于Python爬虫 | 爬取微博和哔哩哔哩数据_微博特定主题数据爬虫python-CSDN博客
【可视化分析案例】用python分析Top100排行榜数据_榜单数据分析-CSDN博客
Python Spider学习笔记(一):爬取B站视频基本信息_selenium爬取b站-CSDN博客
相关代码
1.pachong.py
import bilibili
import bilibilihot
import numpy as np
import pandas as pd
net = int(input("请选择爬取:1.b站 2.停止爬取"))
while (net != 2):
if (net == 1):
choice2 = int(input("请选择爬取的方向:1.排行榜 2.关键词"))
if (choice2 == 1):
# 初始化
b = bilibilihot.bilihot()
# 调用搜索
b.findall()
# 储存数据
b.storeCsvdata()
if (choice2 == 2):
# search: 你需要搜索的数据
keywords = input("请输入搜索的关键词:")
total_page = int(input('请输入搜索的页数:'))
if __name__ == '__main__':
# 针对不同内容修改搜索关键词!!!!
for keyword in keywords:
# 自动爬取多个主题时须注意上面的最大页数定位问题
# 爬取后生成去重了的len(keywords)个f'{keyword}BV号.csv'文件
bilibili.spider_bvid(keywords,total_page)
for keyword in keywords:
# 拼接成文件名
csv_to_merge = f'{keywords}BV号.csv'
# 遍历读取bv_id
filename = f'{keywords}BV号.csv'
# 打开文件并去重
open_csv = pd.read_csv(filename)
open_csv.drop_duplicates(subset='BV号')
bv_id_list = np.array(open_csv['BV号'])
# 循环写入内容
for i in range(0, len(bv_id_list)):
bv_id = bv_id_list[i]
print(f'正在进行第{i + 1}次爬取\n')
# 获取视频所有的基本信息
video_info = bilibili.get_video_info(bv_id)
bvid = video_info['bvid']
aid = video_info['aid']
cid = video_info['cid']
mid = video_info['mid']
name = video_info['name']
title = video_info['title']
tname = video_info['tname']
pub_date = video_info['pub_date']
pub_time = video_info['pub_time']
desc = video_info['desc']
view = video_info['view']
like = video_info['like']
coin = video_info['coin']
favorite = video_info['favorite']
share = video_info['share']
reply = video_info['reply']
danmaku = video_info['danmaku']
# 获取作者的相关信息
user_info = bilibili.get_user_info(uid=mid)
follower = user_info['follower']
archive = user_info['archive']
bilibili.write_to_csv(filename='视频基本信息.csv', bvid=bvid, aid=aid, cid=cid, mid=mid, name=name,
follower=follower,
archive=archive, title=title, tname=tname, pub_date=pub_date, pub_time=pub_time,
desc=desc,
view=view, like=like, coin=coin, favorite=favorite, share=share, reply=reply,
danmaku=danmaku
)
print(f'==========第{i + 1}个BV号:{bv_id}的相关数据已写入csv文件中==========')
print('==================================================\n')
net = int(input("请选择:1.b站 2.停止爬取"))
2.bilibilihot_pic.py
import bilibili
import bilibilihot
import numpy as np
import pandas as pd
net = int(input("请选择爬取:1.b站 2.停止爬取"))
while (net != 2):
if (net == 1):
choice2 = int(input("请选择爬取的方向:1.排行榜 2.关键词"))
if (choice2 == 1):
# 初始化
b = bilibilihot.bilihot()
# 调用搜索
b.findall()
# 储存数据
b.storeCsvdata()
if (choice2 == 2):
# search: 你需要搜索的数据
keywords = input("请输入搜索的关键词:")
total_page = int(input('请输入搜索的页数:'))
if __name__ == '__main__':
# 针对不同内容修改搜索关键词!!!!
for keyword in keywords:
# 自动爬取多个主题时须注意上面的最大页数定位问题
# 爬取后生成去重了的len(keywords)个f'{keyword}BV号.csv'文件
bilibili.spider_bvid(keywords, total_page)
for keyword in keywords:
# 拼接成文件名
csv_to_merge = f'{keywords}BV号.csv'
# 遍历读取bv_id
filename = f'{keywords}BV号.csv'
# 打开文件并去重
open_csv = pd.read_csv(filename)
open_csv.drop_duplicates(subset='BV号')
bv_id_list = np.array(open_csv['BV号'])
# 循环写入内容
for i in range(0, len(bv_id_list)):
bv_id = bv_id_list[i]
print(f'正在进行第{i + 1}次爬取\n')
# 获取视频所有的基本信息
video_info = bilibili.get_video_info(bv_id)
bvid = video_info['bvid']
aid = video_info['aid']
cid = video_info['cid']
mid = video_info['mid']
name = video_info['name']
title = video_info['title']
tname = video_info['tname']
pub_date = video_info['pub_date']
pub_time = video_info['pub_time']
desc = video_info['desc']
view = video_info['view']
like = video_info['like']
coin = video_info['coin']
favorite = video_info['favorite']
share = video_info['share']
reply = video_info['reply']
danmaku = video_info['danmaku']
# 获取作者的相关信息
user_info = bilibili.get_user_info(uid=mid)
follower = user_info['follower']
archive = user_info['archive']
bilibili.write_to_csv(filename='视频基本信息.csv', bvid=bvid, aid=aid, cid=cid, mid=mid, name=name,
follower=follower,
archive=archive, title=title, tname=tname, pub_date=pub_date,
pub_time=pub_time,
desc=desc,
view=view, like=like, coin=coin, favorite=favorite, share=share, reply=reply,
danmaku=danmaku
)
print(f'==========第{i + 1}个BV号:{bv_id}的相关数据已写入csv文件中==========')
print('==================================================\n')
net = int(input("请选择:1.b站 2.停止爬取"))
3.bilibilihot.py
import requests
import json
import time
import pandas as pd
"""
bilihot类的功能
1.初始化需要的参数
无
2.使用方法
a = bilihot() 初始化
a.findall() 调用搜索
a.storeCsvdata() 储存数据
a.data 可以查看数据
a.data[i][j] i为第几个数据集合 j为['作者','标题','播放量','简介','封面','id','播放地址','时间','分区']
"""
class bilihot():
def __init__(self):
# 构造浏览器访问请求头
self.head = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.44',
'Referer': "https://search.bilibili.com/all?from_source=webtop_search&spm_id_from=333.1007&search_source=5keyword=",
'referer': 'https://www.bilibili.com/v/popular/rank/all',
'authority': 'api.bilibili.com',
}
# 保存一份数据
self.data = []
def findall(self):
# 请求数据
target = requests.get('https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all', headers=self.head)
# 将数据转换为py对象
data = json.loads(target.text)
for i in data['data']['list']:
# 作者
author = i['owner']['name']
# 标题
title = i['title']
# 播放量
play = i['stat']['view']
#弹幕数
danmu = i['stat']['danmaku']
#评论数
reply = i['stat']['reply']
#点赞数
like = i['stat']['favorite']
#投币数
coin = i['stat']['coin']
#转发数
share = i['stat']['share']
# 简介
desc = i['desc']
# 封面
pic = i['pic']
# id
id = i['aid']
# 播放地址
arcurl = i['short_link_v2']
# 发布日期
pubdate = i['pubdate']
# 10位时间戳转换为时间字符串
timeArray = time.localtime(pubdate)
pubdate = time.strftime("%Y-%m-%d %H: %M:%S", timeArray)
# 分区
tname = i['tname']
self.data.append([author, title, play, danmu, reply, like, coin, share, desc, pic, id, arcurl, pubdate, tname])
print('请求数据成功')
def storeCsvdata(self):
with open('b站排行榜.csv', 'a+') as fp:
# 构造列表头
name = ['作者', '标题', '播放量', '弹幕数', '评论数', '点赞量', '投币数', '分享数','简介', '封面', 'id', '播放地址', '时间', '分区']
# 写入文件
writer = pd.DataFrame(self.data, columns=name)
writer.to_csv('b站排行榜.csv', index=False, encoding='utf-8-sig') #utf-8用csv打开会乱码
print('写入成功')
fp.close()
if __name__ == '__main__':
# 初始化
b = bilihot()
# 调用搜索
b.findall()
# 储存数据
b.storeCsvdata()
4.bilibili_pic.py
# 导入库
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import ImageColorGenerator, WordCloud # 绘制词云图
import numpy as np
from PIL import Image
# 采用seaborn画图风格
plt.style.use('seaborn')
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文标签 # 指定默认字体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 一、数据读取
csv = '视频基本信息.csv' #排行榜视频分析也可用
# 读取csv数据
df = pd.read_csv(csv)
# 二、数据概览
# 查看数据形状
print(df.shape)
# 查看前5行
print(df.head(5))
# 查看列信息
print(df.info())
# 描述性统计分析
print(df.describe())
# 三、数据清洗
# 查看空值
print(df.isna().any())
# 查看重复值
print(df.duplicated().any())
# 四、数据可视化分析
# 4.1 相关性分析-散点图
df_corr = df[['播放量', '弹幕数', '投币数', '点赞数', '分享数', '收藏数']]
# 斯皮尔曼相关性分析
print('相关性矩阵:')
print(df_corr.corr(method='spearman'))
# 定义x轴y轴数据
x = df['播放量']
y = df['收藏数']
# 画散点图
plt.figure(figsize=(20, 8)) # 图形大小
plt.title('相关性分析: 播放量x收藏数', fontdict={'size': 20}) # 标题
plt.xlabel('播放量') # x轴名称
plt.ylabel('收藏数') # y轴名称
plt.scatter(x, y) # 画图
plt.savefig('相关性分析-散点图.png') # 保存图片
#plt.show() # 显示图形
# ----------------------------- 播放量分布-饼图---------------------------------------------
score_list = df['播放量'].values.tolist()
# 设置分段
bins = [0, 100000, 500000, 1000000, 2000000, 10000000]
# 设置标签
labels = [
'0-10w',
'10w-50w',
'50w-100w',
'100w-200w',
'200w-1000w'
]
# 按分段离散化数据
segments = pd.cut(score_list, bins, labels=labels) # 按分段切割数据
counts = pd.value_counts(segments, sort=False).values.tolist() # 统计个数
plt.figure(figsize=(20, 8)) # 图形大小
plt.pie(counts,
labels=labels, # 设置饼图标签
colors=['cyan', 'yellowgreen', 'lightskyblue',
'aquamarine', 'aliceblue'], # 设置饼图颜色
# explode=(0.15, 0, 0, 0, 0), # 第二部分突出显示,值越大,距离中心越远
autopct='%.2f%%', # 格式化输出百分比
)
plt.title("播放量-分布饼图")
plt.savefig('播放量-饼图.png') # 保存图片
# plt.show() # 显示图片
# ---------------------------------数据分布-箱型图------------------------------------
# 4.3.1 在一张图里
df_play = df['播放量']
df_danmu = df['弹幕数']
df_coin = df['投币数']
df_like = df['点赞数']
df_share = df['分享数']
df_fav = df['收藏数']
plt.figure(figsize=(20, 8)) # 图形大小
plt.title('数据分布-箱型图', fontdict={'size': 20})
plt.boxplot([ # 绘制箱型图
df_play,
df_danmu,
df_coin,
df_like,
df_share,
df_fav,
], labels=[
'播放量',
'弹幕数',
'投币数',
'点赞数',
'分享数',
'收藏数',
])
plt.ylabel('数量')
plt.savefig('箱型图分析_一张图.png') # 保存图片
# plt.show() # 显示图像
# 4.3.2 在多张图里
plt.figure(figsize=(20, 8)) # 整体图形大小
plt.subplot(2, 3, 1)
plt.boxplot(df_play)
plt.title('播放量')
plt.subplot(2, 3, 2)
plt.boxplot(df_danmu)
plt.title('弹幕数')
plt.subplot(2, 3, 3)
plt.boxplot(df_coin)
plt.title('投币数')
plt.subplot(2, 3, 4)
plt.boxplot(df_like)
plt.title('点赞数')
plt.subplot(2, 3, 5)
plt.boxplot(df_share)
plt.title('分享数')
plt.subplot(2, 3, 6)
plt.boxplot(df_fav)
plt.title('收藏数')
plt.suptitle("各指标数据分布-箱型图", fontsize=20)
plt.savefig('箱型图分析_多张图.png') # 保存图片
# plt.show()
# -------------------------视频作者分析-词云图----------------------------------
# 作者列表
author_list = df['UP主名称'].values.tolist()
# 作者字符串
author_str = ' '.join(author_list)
stopwords = [] # 停用词
coloring = np.array(Image.open("背景图.jpeg"))
backgroud_Image = coloring # 读取背景图片
wc = WordCloud(
scale=5, # 清晰度
margin=0, # 边距
background_color="black", # 背景颜色
max_words=1200, # 最大字符数
width=200, # 图宽
height=200, # 图高
font_path="C:\Windows\Fonts\simhei.ttf", # 字体文件路径
stopwords=stopwords, # 停用词
mask=backgroud_Image, # 背景图片
color_func=ImageColorGenerator(coloring), # 根据原始图片颜色生成词云图颜色
random_state=800 # 设置有多少种随机生成状态,即有多少种配色方案
)
wc.generate_from_text(author_str) # 生成词云图
wc.to_file('视频作者_词云图.png') # 保存图片
print('图片已生成: 视频作者_词云图.png')
5.bilibili.py
import re
import os
import csv
import time
import json
import requests
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
def merge_csv(input_filename, output_filename):
"""
读取csv文件内容,并写入新的文件
:param input_filename: 传入的文件名称
:param output_filename: 写入的新文件的名称
:return: 向新文件中写入input_filename中的内容
"""
# 读取文件
csv_data_read = pd.read_csv(input_filename)
# 获取文件总行数
number_of_row = (len(csv_data_read))
# 循环该csv文件中的所有行,并写入信息
for i in range(0, number_of_row):
row_info = csv_data_read.values[i]
# 输出查看内容
# print(row_info)
# 具体内容
row_content = row_info[0]
# 写入
write_to_csv_bvid(output_filename, row_content)
# 退出循环
# 打印进度
print(f'成功向{output_filename}中写入了{input_filename}的全部信息')
def write_to_csv_bvid(input_filename, bvid):
"""
写入新的csv文件,若没有则创建,须根据不同程序进行修改
:param input_filename: 写入的文件名称
:param bvid: BV号
:return: 生成写入的input_filename文件
"""
# OS 判断路径是否存在
file_exists = os.path.isfile(input_filename)
# 设置最大尝试次数
max_retries = 50
retries = 0
while retries < max_retries:
try:
with open(input_filename, mode='a', encoding='utf-8-sig', newline='') as csvfile:#utf-8用csv打开会乱码
fieldnames = ['BV号']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
writer.writerow({
'BV号': bvid
})
# print('写入文件成功')
break # 如果成功写入,跳出循环
except PermissionError as e:
retries += 1
print(f"将爬取到的数据写入csv时,遇到权限错误Permission denied,文件可能被占用或无写入权限: {e}")
print(f"等待3s后重试,将会重试50次... (尝试 {retries}/{max_retries})")
time.sleep(3) # 等待10秒后重试
else:
print("将爬取到的数据写入csv时遇到权限错误,且已达到最大重试次数50次,退出程序")
def spider_bvid(keyword,total_page):
"""
利用seleniume获取搜索结果的bvid,供给后续程序使用
:param keyword: 搜索关键词
:return: 生成去重的output_filename = f'{keyword}BV号.csv'
"""
# 保存的文件名
input_filename = f'{keyword}BV号.csv'
# 启动爬虫
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
browser = webdriver.Chrome(options=options) # 设置无界面爬虫
browser.set_window_size(1400, 900) # 设置全屏,注意把窗口设置太小的话可能导致有些button无法点击
browser.get('https://bilibili.com')
# 刷新一下,防止搜索button被登录弹框遮住
browser.refresh()
print("============成功进入B站首页!!!===========")
input = browser.find_element(By.CLASS_NAME, 'nav-search-input')
button = browser.find_element(By.CLASS_NAME, 'nav-search-btn')
# 输入关键词并点击搜索
input.send_keys(keyword)
#输入页数
input.send_keys(total_page)
button.click()
print(f'==========成功搜索{keyword}相关内容==========')
# 设置窗口
all_h = browser.window_handles
browser.switch_to.window(all_h[1])
int(total_page)
for i in range(0, total_page):
# url 需要根据不同关键词进行调整内容!!!
url = (f"https://search.bilibili.com/all?keyword={keyword}"
f"&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={i}")
print(f"===========正在尝试获取第{i + 1}页网页内容===========")
print(f"===========本次的url为:{url}===========")
browser.get(url)
# 这里请求访问网页的时间也比较久(可能因为我是macos),所以是否需要等待因设备而异
# 取消刷新并长时间休眠爬虫以避免爬取太快导致爬虫抓取到js动态加载源码
# browser.refresh()
print('正在等待页面加载:3')
time.sleep(1)
print('正在等待页面加载:2')
time.sleep(1)
print('正在等待页面加载:1')
time.sleep(1)
# 直接分析网页
html = browser.page_source
# print("网页源码" + html) 用于判断是否获取成功
soup = BeautifulSoup(html, 'lxml')
infos = soup.find_all(class_='bili-video-card')
bv_id_list = []
for info in infos:
# 只定位视频链接
href = info.find('a').get('href')
# 拆分
split_url_data = href.split('/')
# 利用循环删除拆分出现的空白
for element in split_url_data:
if element == '':
split_url_data.remove(element)
# 打印检验内容
# print(split_url_data)
# 获取bvid
bvid = split_url_data[2]
# 利用if语句直接去重
if bvid not in bv_id_list:
bv_id_list.append(bvid)
for bvid_index in range(0, len(bv_id_list)):
# 写入 input_filename
write_to_csv_bvid(input_filename, bv_id_list[bvid_index])
# 输出提示进度
print('写入文件成功')
print("===========成功获取第" + str(i + 1) + "次===========")
time.sleep(1)
i += 1
# 退出爬虫
browser.quit()
# 打印信息显示是否成功
print(f'==========爬取完成。退出爬虫==========')
def write_to_csv(filename, bvid, aid, cid, mid, name, follower, archive, title, tname, pub_date, pub_time, desc,
view, like, coin, favorite, share, reply, danmaku):
file_exists = os.path.isfile(filename)
max_retries = 50
retries = 0
while retries < max_retries:
try:
with open(filename, mode='a', encoding='utf-8-sig', newline='') as csvfile: #utf-8用csv打开会乱码
fieldnames = ['BV号', 'AV号', 'CID', 'UP主ID', 'UP主名称', 'UP主粉丝数', '作品总数', '视频标题',
'视频分类标签',
'发布日期', '发布时间', '视频简介', '播放量', '点赞数', '投币数', '收藏数', '分享数',
'评论数',
'弹幕数']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
writer.writerow({
'BV号': bvid, 'AV号': aid, 'CID': cid, 'UP主ID': mid, 'UP主名称': name, 'UP主粉丝数': follower,
'作品总数': archive, '视频标题': title, '视频分类标签': tname, '发布日期': pub_date,
'发布时间': pub_time,
'视频简介': desc, '播放量': view, '点赞数': like, '投币数': coin, '收藏数': favorite,
'分享数': share,
'评论数': reply, '弹幕数': danmaku
})
break # 如果成功写入,跳出循环
except PermissionError as e:
retries += 1
print(f"将爬取到的数据写入csv时,遇到权限错误Permission denied,文件可能被占用或无写入权限: {e}")
print(f"等待3s后重试,将会重试50次... (尝试 {retries}/{max_retries})")
else:
print("将爬取到的数据写入csv时遇到权限错误,且已达到最大重试次数50次,退出程序")
def get_user_info(uid):
"""
通过uid(即mid)获取UP主的粉丝总数和作品总数
:param uid: mid
:return:user_info_dict
"""
# 定义空字典用于存放数据
# 粉丝数 follower
# 作品总数 archive
user_info_dict = {}
# 首先写入请求头
# 设置用户代理 User_Agent及Cookies
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
'Cookie': "_uuid=184FB385-91A4-61AD-B6C7-884DD785628842770infoc; buvid3=8B0E1B98-EBDE-14AA-F297-ECCADCAED1D231558infoc; b_nut=1709371444; buvid4=4F7E1085-B848-54CA-014C-1A0A23980B5C31558-024030209-%2BQIQf5CyL2PFHTSlQieypA%3D%3D; buvid_fp=280a86e800223ff82a3e0e2d39cb9ddc; b_lsid=8DE77663_18E03A27E77; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; enable_web_push=DISABLE; FEED_LIVE_VERSION=V8; header_theme_version=CLOSE; home_feed_column=4; browser_resolution=982-738; CURRENT_FNVAL=4048; sid=mcyhai1k"}
# 将传入的的uid组成up主主页的api_url
api_url = f'https://api.bilibili.com/x/web-interface/card?mid={uid}'
# 打印次数,数据量大,便于查看进程
print(f"正在进行爬取uid为:{uid}的UP主的粉丝数量与作品总数")
# 打印本次要获取的uid,用于错误时确认
print(f"==========本次获取数据的up主的uid为:{uid}==========")
print(f"url为:{api_url}")
# 利用requests进行访问,并返回需要的封装信息
up_info = requests.get(url=api_url, headers=headers)
# 不知道会不会被封ip,保险起见
# time.sleep(2)
# 将数据转化为json格式
up_info_json = json.loads(up_info.text)
# 利用json定位相关数据
fans_number = up_info_json['data']['card']['fans']
user_info_dict['follower'] = fans_number
archive_count = up_info_json['data']['archive_count']
user_info_dict['archive'] = archive_count
# 等待
print('正在等待,以防访问过于频繁\n')
time.sleep(1.5)
return user_info_dict
def get_video_info(bv_id):
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
'Cookie': "_uuid=184FB385-91A4-61AD-B6C7-884DD785628842770infoc; buvid3=8B0E1B98-EBDE-14AA-F297-ECCADCAED1D231558infoc; b_nut=1709371444; buvid4=4F7E1085-B848-54CA-014C-1A0A23980B5C31558-024030209-%2BQIQf5CyL2PFHTSlQieypA%3D%3D; buvid_fp=280a86e800223ff82a3e0e2d39cb9ddc; b_lsid=8DE77663_18E03A27E77; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; enable_web_push=DISABLE; FEED_LIVE_VERSION=V8; header_theme_version=CLOSE; home_feed_column=4; browser_resolution=982-738; CURRENT_FNVAL=4048; sid=mcyhai1k"}
api_url = f'https://api.bilibili.com/x/web-interface/view?bvid={bv_id}'
# 打印本次要获取的bvid,用于错误时确认
print(f"正在进行爬取uid为:{bv_id}的UP主的粉丝数量与作品总数")
print(f"==========本次获取数据的视频BV号为:{bv_id}==========")
print(f"url为:{api_url}")
# https://api.bilibili.com/x/web-interface/view?BV1n24y1D75V
video_info = requests.get(url=api_url, headers=headers)
video_info_json = json.loads(video_info.text)
# 创建存放的字典
info_dict = {}
# 视频bvid,即bv号
bvid = video_info_json['data']['bvid']
info_dict['bvid'] = bvid
# 视频aid,即av号
aid = video_info_json['data']['aid']
info_dict['aid'] = aid
# 视频cid,用于获取弹幕信息
cid = video_info_json['data']['cid']
info_dict['cid'] = cid
# 作者id
mid = video_info_json['data']['owner']['mid']
info_dict['mid'] = mid
# up主昵称
name = video_info_json['data']['owner']['name']
info_dict['name'] = name
# 视频标题
title = video_info_json['data']['title']
info_dict['title'] = title
# 视频标签
tname = video_info_json['data']['tname']
info_dict['tname'] = tname
# 视频发布时间戳
pubdate = video_info_json['data']['pubdate']
# 转化时间戳
pub_datatime = datetime.fromtimestamp(pubdate)
# 整体格式
pub_datatime_strf = pub_datatime.strftime('%Y-%m-%d %H:%M:%S')
# 日期
date = re.search(r"(\d{4}-\d{1,2}-\d{1,2})", pub_datatime_strf)
info_dict['pub_date'] = date.group()
# 时间
pub_time = re.search(r"(\d{1,2}:\d{1,2}:\d{1,2})", pub_datatime_strf)
info_dict['pub_time'] = pub_time.group()
# 视频创建时间戳
# ctime = info['ctime']
# 视频简介
desc = video_info_json['data']['desc']
info_dict['desc'] = desc
# 视频播放量
view = video_info_json['data']['stat']['view']
info_dict['view'] = view
# 点赞数
like = video_info_json['data']['stat']['like']
info_dict['like'] = like
# 投币数
coin = video_info_json['data']['stat']['coin']
info_dict['coin'] = coin
# 收藏数
favorite = video_info_json['data']['stat']['favorite']
info_dict['favorite'] = favorite
# 分享数
share = video_info_json['data']['stat']['share']
info_dict['share'] = share
# 评论数
repiy = video_info_json['data']['stat']['reply']
info_dict['reply'] = repiy
# 视频弹幕数量
danmaku = video_info_json['data']['stat']['danmaku']
info_dict['danmaku'] = danmaku
print(f'=========={bv_id} 的视频基本信息已成功获取==========')
# 发布作品时的动态
# dynamic = info['dynamic']
print('正在等待,以防访问过于频繁\n')
time.sleep(1.5)
return info_dict