1.打开B站的热门视频
2.右键查看页面源代码,将第一个标题选取部分搜索,得到信息内容的来源,查看负载中的请求地址,这里作为目标url地址。
3.爬取热门视频的信息并保存到csv文件中
爬虫部分代码
import requests
import json
import csv
url = 'https://api.bilibili.com/x/web-interface/popular?ps=20&pn=1&web_location=333.934&w_rid=3fe5bc9825c83f407b66ba1064507049&wts=1707288726'
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
'Cookie':浏览器中的cookie
}
f = open('热门视频信息.csv',mode='a',encoding='utf_8')
csv_write = csv.DictWriter(f, fieldnames=[
'视频标题',
'视频类型',
'播放量',
'弹幕量',
'评论数量',
'收藏数量',
'投币数量',
'分享数量',
'点赞量',
])
csv_write.writeheader()
response = requests.get(url=url,headers=headers)
json_data = response.json()
for index in json_data['data']['list']:
title = index['title'] #视频标题
tname = index['tname'] #视频类型
view = index['stat']['view'] #播放量
danmaku = index['stat']['danmaku'] #弹幕量
reply = index['stat']['reply'] #评论数量
favorite = index['stat']['favorite'] #收藏数量
coin = index['stat']['coin'] #投币数量
share = index['stat']['share'] #分享数量
like = index['stat']['like'] #点赞量
dit={
'视频标题':title,
'视频类型':tname,
'播放量':view,
'弹幕量':danmaku,
'评论数量':reply,
'收藏数量':favorite,
'投币数量':coin,
'分享数量':share,
'点赞量':like
}
csv_write.writerow(dit)
print(dit)
4.查看爬取结果
5.打开保存的文件csv中的信息,分别建立空列表保存,作为绘图数据
这里我只选取了评论数量和弹幕量的显示,生成柱状图代码。
title_list = []#建立列表
tname_list = []
view_list = []
danmaku_list = []
reply_list = []
favorit_list = []
coin_list = []
share_list = []
like_list = []
with open('热门视频信息.csv','r',encoding='utf-8') as csvFile:
csvReader = csv.DictReader(csvFile)
for row in csvReader: # 打印指定列的内容
title_list.append(row['视频标题'])
reply_list.append(row['评论数量'])
danmaku_list.append(row['弹幕量'])
print(title_list,reply_list,danmaku_list)
c = (
Bar()
.add_xaxis(title_list)
.add_yaxis("评论数量", reply_list)
.add_yaxis("弹幕量", danmaku_list)
.set_global_opts(
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
title_opts=opts.TitleOpts(title="B站视频信息", subtitle="长标签"),
)
.reversal_axis()
.render("bar_rotate_xaxis_label.html")
)
6.完整代码,(爬取数据+保存数据+根据标题评论弹幕量生成柱状图。)
import requests
import json
import csv
from pyecharts import options as opts
from pyecharts.charts import Bar
url = 'https://api.bilibili.com/x/web-interface/popular?ps=20&pn=1&web_location=333.934&w_rid=3fe5bc9825c83f407b66ba1064507049&wts=1707288726'
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0',
'Cookie':'i-wanna-go-back=-1; buvid4=5B748413-01CA-7940-DEB2-A1C561CDC85F39728-022041813-551X6fL2lClZpQthkEGEpg%3D%3D; LIVE_BUVID=AUTO2816502644766146; buvid_fp_plain=undefined; CURRENT_BLACKGAP=0; rpdid=|(k)~YJYYkJ)0J\'uYY)YmYuk|; b_ut=5; CURRENT_PID=46c48b60-df35-11ed-b1c0-ff6c173ac20f; FEED_LIVE_VERSION=V8; nostalgia_conf=-1; hit-new-style-dyn=1; home_feed_column=5; CURRENT_QUALITY=80; enable_web_push=DISABLE; header_theme_version=CLOSE; hit-dyn-v2=1; fingerprint=033d1655e7e8cd27c1badee32b999e83; CURRENT_FNVAL=4048; bp_video_offset_313054266=868560730804191252; _uuid=3510E95C10-9277-9119-2C38-2C92A57373F941251infoc; buvid3=281B2A4B-4B77-12EC-E609-945F6F56E9DC42216infoc; b_nut=1701069541; SESSDATA=5b3db077%2C1716621602%2C98339%2Ab1CjD-EPeJo7HB0cga8OeTtvueBqLJyfN6z8uSoDCwsf9Az7xOzBbdzdqACoz_jsAUk9wSVjFCcmZPT1RzdW11RDhacUFUN2RkZ0NXZ2lFYmY2RFdFb0NuTlB4OFV0MWpMQVpxcjdWdWx2cG1GSGc0Sm0wRnI3aFJtNFdQbUNkYUg3WnVZeXhTWEdnIIEC; bili_jct=da7e1fde5a3886915cb719f7398cb19c; DedeUserID=391236845; DedeUserID__ckMd5=ebeef92af57b95cc; browser_resolution=2048-1034; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDIxNzUxMzksImlhdCI6MTcwMTkxNTg3OSwicGx0IjotMX0.98W7PLkx1CDGAIx1Frp9SfANDPAPUY861NuFA7i5sIU; bili_ticket_expires=1702175079; buvid_fp=f9245ca6c68a2e07c64f8786bd3caca6; b_lsid=A25FE721_18C428A5D03; bp_video_offset_391236845=872238283662295058; sid=5phwuvdl; PVID=3'
}
f = open('热门视频信息.csv',mode='a',encoding='utf_8_sig')
csv_write = csv.DictWriter(f, fieldnames=[
'视频标题',
'视频类型',
'播放量',
'弹幕量',
'评论数量',
'收藏数量',
'投币数量',
'分享数量',
'点赞量',
])
csv_write.writeheader()
response = requests.get(url=url,headers=headers)
json_data = response.json()
for index in json_data['data']['list']:
title = index['title'] #视频标题
tname = index['tname'] #视频类型
view = index['stat']['view'] #播放量
danmaku = index['stat']['danmaku'] #弹幕量
reply = index['stat']['reply'] #评论数量
favorite = index['stat']['favorite'] #收藏数量
coin = index['stat']['coin'] #投币数量
share = index['stat']['share'] #分享数量
like = index['stat']['like'] #点赞量
dit={
'视频标题':title,
'视频类型':tname,
'播放量':view,
'弹幕量':danmaku,
'评论数量':reply,
'收藏数量':favorite,
'投币数量':coin,
'分享数量':share,
'点赞量':like
}
csv_write.writerow(dit)
title_list = []#建立列表
tname_list = []
view_list = []
danmaku_list = []
reply_list = []
favorit_list = []
coin_list = []
share_list = []
like_list = []
with open('热门视频信息.csv','r',encoding='utf-8') as csvFile:
csvReader = csv.DictReader(csvFile)
for row in csvReader: # 打印指定列的内容
title_list.append(row['视频标题'])
reply_list.append(row['评论数量'])
danmaku_list.append(row['弹幕量'])
print(title_list,reply_list,danmaku_list)
c = (
Bar()
.add_xaxis(title_list)
.add_yaxis("评论数量", reply_list)
.add_yaxis("弹幕量", danmaku_list)
.set_global_opts(
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
title_opts=opts.TitleOpts(title="B站视频信息", subtitle="长标签"),
)
.reversal_axis()
.render("bar_rotate_xaxis_label.html")
)