import pandas as pd
import numpy as np
import time
import jieba
import datetime
from pyecharts import options as opts
from pyecharts. charts import *
from pyecharts. commons. utils import JsCode
from pyecharts. components import Table
from pyecharts. options import ComponentTitleOpts
df = pd. read_excel( 'B站新榜_总粉丝数榜单.xlsx' )
del df[ '头像' ]
df. head( 2 )
数据更新时间 创作领域 等级 获赞数 mid up主 up主标签 播放数 性别 类型 平均获赞数 平均播放数 充电人数 总粉丝人数 作品数 0 2022-05-15 14:14:58 生活 6 6104495 321173469 哔哩哔哩大会员 哔哩哔哩大会员官方账号 2939318 男 生活 290690.2 139967.5 351 24620688 21 1 2022-05-15 14:14:58 生活 6 91718101 9824766 敬汉卿 bilibili 2020百大UP主、2019年度弹幕人气奖UP主 1588762824 男 生活 77793.1 1347551.2 20379 9272243 1179
type = df[ '创作领域' ] . value_counts( ) . index. tolist( )
type
['娱乐',
'游戏',
'国创',
'动物圈',
'动画',
'影视',
'番剧',
'纪录片',
'资讯',
'鬼畜',
'数码',
'舞蹈',
'美食',
'知识',
'运动',
'科技',
'时尚',
'汽车',
'音乐',
'生活',
'电视剧',
'电影']
headers = df. columns. tolist( ) [ 1 : ]
headers
['创作领域',
'等级',
'获赞数',
'mid',
'up主',
'up主标签',
'播放数',
'性别',
'类型',
'平均获赞数',
'平均播放数',
'充电人数',
'总粉丝人数',
'作品数']
tab = Tab( )
for i in range ( len ( type ) ) :
tab_category = df[ df[ '创作领域' ] == type [ i] ]
rows = tab_category[ headers] . apply ( lambda x: list ( x) , axis= 1 ) . values. tolist( )
table = (
Table( )
. add(
headers,
rows,
attributes= {
'class' : 'fl_table' ,
'style' : 'margin: 0 auto'
}
)
. set_global_opts(
title_opts= ComponentTitleOpts(
title= f' { type [ i] } - 总粉丝排行榜单Top50' ,
subtitle= '更新时间:2022-05-15 充电人数为负是数据本身问题'
)
)
)
tab. add( table, type [ i] )
tab. render_notebook( )
def bar_chart ( desc, title_pos, num) :
df_t = df. sort_values( by= [ desc] , ascending= False ) . head( num)
chart = (
Bar( )
. add_xaxis(
df_t[ 'up主' ] . tolist( )
)
. add_yaxis(
'' ,
df_t[ desc] . tolist( )
)
. set_global_opts(
xaxis_opts= opts. AxisOpts(
is_scale= True ,
axislabel_opts= { 'rotate' : '90' } ,
splitline_opts= opts. SplitLineOpts(
is_show= True ,
linestyle_opts= opts. LineStyleOpts(
type_= 'dashed'
)
)
) ,
yaxis_opts= opts. AxisOpts(
is_scale= True ,
name= '' ,
type_= 'value' ,
splitline_opts= opts. SplitLineOpts(
is_show= True ,
linestyle_opts= opts. LineStyleOpts(
type_= 'dashed'
)
)
) ,
tooltip_opts= opts. TooltipOpts(
trigger= 'axis' ,
axis_pointer_type= 'shadow'
) ,
title_opts= opts. TitleOpts(
title= 'up主-' + desc,
subtitle= f'👇👇👇👇' ,
pos_left= title_pos[ 0 ] ,
pos_top= title_pos[ 1 ] ,
title_textstyle_opts= opts. TextStyleOpts(
color= '#42B983' ,
font_size= 16
) ,
)
)
)
return chart
def transform_fans ( x) :
if x <= 100000 :
return '10w'
elif x <= 500000 :
return '10w~50w'
elif x <= 1000000 :
return '50w~100w'
elif x <= 2000000 :
return '100w~200w'
elif x <= 3000000 :
return '200w~300w'
elif x <= 4000000 :
return '300w~400w'
elif x <= 5000000 :
return '400w~500w'
elif x <= 6000000 :
return '500w~600w'
elif x <= 7000000 :
return '600w~700w'
elif x <= 8000000 :
return '700w~800w'
elif x <= 9000000 :
return '800w~900w'
elif x <= 10000000 :
return '900w~1000w'
else :
return '>1000w'
def transform_work ( x) :
if x <= 100 :
return '0~100'
elif x <= 200 :
return '100~200'
elif x <= 300 :
return '200~300'
elif x <= 400 :
return '300~400'
elif x <= 500 :
return '400~500'
else :
return '>500'
def pie_chart ( ) :
df[ 'fans_cut' ] = df[ '总粉丝人数' ] . apply ( lambda x: transform_fans( x) )
df_f = df[ 'fans_cut' ] . value_counts( )
fans_pairs = [ list ( z) for z in zip ( df_f. index. tolist( ) , df_f. values. tolist( ) ) ]
df[ 'works_cut' ] = df[ '作品数' ] . apply ( lambda x: transform_work( x) )
df_w = df[ 'works_cut' ] . value_counts( )
works_pairs = [ list ( z) for z in zip ( df_w. index. tolist( ) , df_w. values. tolist( ) ) ]
pie = (
Pie( )
. add(
'' ,
fans_pairs,
radius= [ '55' , '100' ] ,
center= [ '30%' , '90%' ]
)
. add(
'' ,
works_pairs,
radius= [ '55' , '100' ] ,
center= [ '75%' , '90%' ]
)
. set_series_opts( label_opts= opts. LabelOpts( formatter= '{b}: {c} {d}%' ) )
. set_global_opts(
legend_opts= opts. LegendOpts( is_show= False ) ,
title_opts= [
dict (
text= f'总粉丝人数区间分布' ,
left= '15' ,
top= '80%' ,
textStyle= dict (
color= '#334B5C' ,
fontSize= 16
)
) ,
dict (
text= '作品数区间分布' ,
left= '60%' ,
top= '80%' ,
textStyle= dict (
color= '#334B5C' ,
fontSize= 16
)
)
]
)
)
return pie
grid = Grid(
init_opts= opts. InitOpts(
width= '1000px' ,
height= '2000px' ,
theme= 'light'
)
)
grid. add(
bar_chart( '获赞数' , [ '15%' , '1%' ] , 8 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '5%' ,
pos_bottom= '85%' ,
pos_left= '15%' ,
pos_right= '50%'
)
)
grid. add(
bar_chart( '平均获赞数' , [ '60%' , '1%' ] , 8 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '5%' ,
pos_bottom= '85%' ,
pos_left= '60%' ,
pos_right= '5%'
)
)
grid. add(
bar_chart( '播放数' , [ '15%' , '21%' ] , 8 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '25%' ,
pos_bottom= '65%' ,
pos_left= '15%' ,
pos_right= '50%'
)
)
grid. add(
bar_chart( '平均播放数' , [ '60%' , '21%' ] , 8 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '25%' ,
pos_bottom= '65%' ,
pos_left= '60%' ,
pos_right= '5%'
)
)
grid. add(
bar_chart( '总粉丝人数' , [ '15%' , '40%' ] , 15 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '44%' ,
pos_bottom= '46%' ,
pos_left= '15%' ,
pos_right= '5%'
)
)
grid. add(
bar_chart( '作品数' , [ '15%' , '58%' ] , 15 ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '61%' ,
pos_bottom= '29%' ,
pos_left= '15%' ,
pos_right= '5%'
)
)
grid. add(
pie_chart( ) ,
is_control_axis_index= False ,
grid_opts= opts. GridOpts(
pos_top= '25%' ,
pos_bottom= '65%' ,
pos_left= '5%' ,
pos_right= '5%'
)
)
grid. render_notebook( )