导入模块
# 导入模块
import pandas as pd
import numpy as np
from collections import Counter
from pyecharts.charts import Geo,Bar,Line,Pie,Timeline
from pyecharts import options as opts
from pyecharts.globals import ThemeType,ChartType
数据准备&预处理
df = pd.read_excel(path)
df.info()
# 填充空白值
df['用户名'].fillna('未知',inplace = True)
df.isnull().sum()
数据集展示如下:
数据可视化
1、评分等级分布
# 评分等级分布
df_star = df.groupby(by = '评分').count()['id']
df_star = df_star.sort_values(ascending = True)
x_data = [str(i) for i in list(df_star.index)]
y_data = df_star.values.tolist()
b1 = (
Bar(init_opts = opts.InitOpts(theme = ThemeType.LIGHT))
.add_xaxis(x_data)
.add_yaxis('',y_data)
.reversal_axis()
.set_series_opts(label_opts = opts.LabelOpts(position = 'right'))
.set_global_opts(
yaxis_opts = opts.AxisOpts(name = '评分等级'),
xaxis_opts = opts.AxisOpts(name = '人/次'),
title_opts = opts.TitleOpts(title = '评分等级分布',pos_left = '45%',pos_top = '5%'),
legend_opts = opts.LegendOpts(type_ = "scroll", pos_left = "85%",pos_top = "28%",orient = "vertical")
)
)
df_star = df.groupby('评分').count()['id']
x_data = [str(i) for i in list(df_star.index)]
y_data = df_star.values.tolist()
p1 = (
Pie(init_opts = opts.InitOpts(width = '800px', height = '600px'))
.add(
'',
[list(z) for z in zip(x_data, y_data)],
radius = ['10%', '30%'],
center = ['65%', '60%'],
label_opts = opts.LabelOpts(is_show = True),
)
.set_series_opts(label_opts = opts.LabelOpts(formatter='评分{b}: {d}%'),position="outside")
)
b1.overlap(p1)
b1.render