import pandas as pd
df = pd.read_csv('弹幕.csv')
df.info()
df['comment_length'] = df['comments'].map(lambda x: len(x))
length_series = df['comment_length'].value_counts()
length_series.sort_index(ascending=True, inplace=True)
# 评论长度列表(升序)
length_list = length_series.index.astype(int).tolist()
# 各长度对应出现次数列表
count_list = length_series.values.astype(int).tolist()
# 绘制直方图
from pyecharts import options as opts
from pyecharts.charts import Bar
chart = Bar()
chart.add_xaxis(length_list).add_yaxis("弹幕", count_list, color='#DF0101').set_global_opts(
title_opts=opts.TitleOpts(title="弹幕长度分布"),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
).render("弹幕长度分布.html")
chart.render_notebook()
import numpy as np
import time
# 将color列的数据类型由“str”转为“int”, 数据格式由“十进制”转为“十六进制”
df['color'] = df['颜色'].astype(int).map(lambda x: str(hex(x)))
# 将video_time列的数据类型转化为float
df['出现时间点'] = df['出现时间点'].astype('float')
# 新建一个临时的DataFrame
temp_df = pd.DataFrame({})
temp_df['出现时间点'] = df['出现时间点']
# 将video_time列按照10秒一区间进行划分。
temp_df = temp_df.apply(lambda x : pd.cut(x, list(range(0, 1435, 10)) + [np.inf], labels=list(range(0, 1435, 10))))
count_series = temp_df['出现时间点'].value_counts()
count_series.sort_index(ascending=True, inplace=True)
# 将time数据格式由“秒”转为“分:秒”
count_series.index = count_series.index.map(lambda x: time.strftime('%M:%S', time.gmtime(x)))
time_list = count_series.index.tolist()
count_list = count_series.values.astype('int').tolist()
# 绘制折线图
from pyecharts.charts import Line
chart = (
Line()
.add_xaxis(time_list)
.add_yaxis("弹幕", count_list, is_smooth=True)
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="弹幕量与视频时间关系"),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
),
)
)
chart.render_notebook()
【弹幕长度和弹幕出现点和时间的关系】
最新推荐文章于 2024-06-07 10:49:04 发布