用Python的aiohttp和B站API分析UP主的流量变化
闲来无事,练习aiohttp的模块使用,借助B站的API对UP主视频的流量趋势分析,包括播放量和评论数的变化,废话不多说直接上代码,感谢:
matplotlib.pyplot绘制多个折线图,并标注最大值和最小值
协程
一、UP主id的获取
进入B站,点击进入要查询的UP主的主页面,链接中的数字就是,如飞社长的11100920.
二、代码
我用的是Jupyter
import aiohttp
import asyncio
import math
import random
import nest_asyncio
import matplotlib.pyplot as plt
import time
%matplotlib inline
nest_asyncio.apply()
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
下面代码来自:这位博主,感谢
user_agent_list = [
"Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
"Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)",
"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11",
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
"Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1)",
"Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727;SE2.XMetaSr1.0)"
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
#获取UP主信息,根据UP主id
async def getInfoByUPID(up_id):
url="https://api.bilibili.com/x/space/acc/info?mid="+up_id+"&jsonp=jsonp"
headers={"User-Agent":random.choice(user_agent_list)}
async with aiohttp.ClientSession() as session:
async with session.get(url,headers=headers) as r:
r_json=await r.json()
if r.status==200:
return r_json["data"]["name"]
else:
return ""
#获取视频数量 方便并发
async def getVideoListLengthByUPID(up_id):
headers={"User-Agent":random.choice(user_agent_list)}
url="https://api.bilibili.com/x/space/arc/search?mid="+up_id+"&ps=1&tid=0&pn=1&keyword=&order=pubdate&jsonp=jsonp"
async with aiohttp.ClientSession() as session:
async with session.get(url,headers=headers) as r:
r_json=await r.json()
if r.status==200:
return r_json.get("data").get("page").get("count")
else:
return 0
#获取一个列表,这是并发的关键
async def getVideoListItemByUPID(session,up_id,page_index):
headers={"User-Agent":random.choice(user_agent_list)}
url="https://api.bilibili.com/x/space/arc/search?mid="+up_id+"&ps=30&tid=0&pn="+str(page_index)+"&keyword=&order=pubdate&jsonp=jsonp"
async with session.get(url,headers=headers) as r:
r_json=await r.json()
if r.status==200:
data_list=[]#定义变量用来存数据
for item in r_json["data"]["list"]["vlist"]:
new_json={}#我们选择的放入列表
new_json["comment"]=item.get("comment")#评论数
new_json["play"]=item.get("play")#播放量
new_json["created"]=item.get("created")#时间
new_json["aid"]=item.get("aid")#视频id 唯一
new_json["typeid"]=item.get("typeid")#类型
new_json["length"]=item.get("length")#播放时长
new_json["video_review"]=item.get("video_review")
# new_json["title"]=item.get("title")
data_list.append(new_json)
return data_list
else:
return []
# 并发的组织者
async def getVideoListByUPID(up_id):
#先获取视频个数,然后我们获取视频列表
list_length=await getVideoListLengthByUPID(up_id)
name = await getInfoByUPID(up_id)
# print("视频个数:"+str(list_length))
async with aiohttp.ClientSession() as session:
tasks = [asyncio.create_task(getVideoListItemByUPID(session, up_id,page_index)) for page_index in range(1,math.ceil(list_length/30+1))]
await asyncio.wait(tasks)
return tasks,name
数据处理和图像绘制代码,如果图像不显示可能是不在Jupyter运行,需要更改部分代码
def main(up_id):
tasks,name=asyncio.run(getVideoListByUPID(up_id))
# 整理 tasks
data_list=[]
for i in tasks:
data_list+=i.result()
# 数据处理 先按照时间排序 升序
data_list_by_created_asc=sorted(data_list,key=lambda stu:stu["created"])
# 从data_list_by_created_asc分理处 paly 列表
play_list_in_data_list_by_created_asc=[i["play"] for i in data_list_by_created_asc]
comment_list_in_data_list_by_created_asc=[i["comment"] for i in data_list_by_created_asc]
#生成 index
x_index=[i+1 for i in range(len(comment_list_in_data_list_by_created_asc))]
# 找出最大值 最小值数值以及位置
play_max_data=max(play_list_in_data_list_by_created_asc)
play_max_data_index=play_list_in_data_list_by_created_asc.index(play_max_data)
play_min_data=min(play_list_in_data_list_by_created_asc)
play_min_data_index=play_list_in_data_list_by_created_asc.index(play_min_data)
comment_max_data=max(comment_list_in_data_list_by_created_asc)
comment_max_data_index=comment_list_in_data_list_by_created_asc.index(comment_max_data)
comment_min_data=min(comment_list_in_data_list_by_created_asc)
comment_min_data_index=comment_list_in_data_list_by_created_asc.index(comment_min_data)
# 生成时间
start_time=data_list_by_created_asc[0]["created"]
start_time=time.strftime("%Y-%m-%d",time.localtime(start_time))
end_time=data_list_by_created_asc[-1]["created"]
end_time=time.strftime("%Y-%m-%d",time.localtime(end_time))
# 开始绘制 图标
plt.figure(figsize=(20, 6))# 图大小
plt.plot(x_index,play_list_in_data_list_by_created_asc,label='播放量')
plt.title(name+u"视频播放量变化"+start_time+"至"+end_time,fontsize=20)
plt.legend()
# 放入最大值 最小值
plt.plot(play_max_data_index+1,play_max_data,'ko')
plt.plot(play_min_data_index+1,play_min_data,'ko')
plt.annotate(str(play_max_data),xy=(play_max_data_index+1,play_max_data),xytext=(play_max_data_index+2,play_max_data+2))
plt.annotate(str(play_min_data),xy=(play_min_data_index+1,play_min_data),xytext=(play_min_data_index+2,play_min_data+2))
# 开始绘制 图标
plt.figure(figsize=(20, 6))# 图大小
plt.plot(x_index,comment_list_in_data_list_by_created_asc,label='评论数')
plt.title(name+u"视频评论数变化"+start_time+"至"+end_time,fontsize=20)
plt.legend()
# 放入最大值 最小值
plt.plot(comment_max_data_index+1,comment_max_data,'ko')
plt.plot(comment_min_data_index+1,comment_min_data,'ko')
plt.annotate(str(comment_max_data),xy=(comment_max_data_index+1,comment_max_data),xytext=(comment_max_data_index+2,comment_max_data+2))
plt.annotate(str(comment_min_data),xy=(comment_min_data_index+1,comment_min_data),xytext=(comment_min_data_index+2,comment_min_data+2))
三、效果
从折线图我们可以看到很多的信息,李灿的视频最近不太行
四、说明
其实我们拿到数据之后可以干很多的事情的,从不同维度分析,欢迎大家一起讨论学习哟!