Pandas处理数据后echarts图标不显示数据
闲来无事掏出了好久没做的数据预处理和可视化。
数据源是分为四列数据,第一列是记录的是数据的分类 总共分为了3大类,Biological Process,Molecular Function,Cellular Component。第二列数据则是一些不同的值,第三四列分别是上升和下降的一些数据。需求就是第二列数据显示为X轴,Y轴分别是上升和下降的数据 每个x轴对应的是其三大类的数据。其实对于这些数据。三大类无非就是属性,x轴对应数据就是具体变量,那上升和下降就是具体的行为。那只需要按属性和行为分类记录他们的变量就好,那就是6种数据。
1.先提出数据这里使用pandas,我们将第二列数据作为X轴坐标
df = pd.read_excel('data.xlsx')
x_values = list(df['GO Term (level2)'])
2.写一个方法,用于根据熟悉和行为提取数据
需要注意的是有些变量的属性是不存在行为的 那这时候就会抛出索引异常,这时候我们就以0填充进去
def data_extraction(title: str, up_or_down: str) -> list:
y_values = []
for name in x_values:
result = df.loc[(df['GO Term (level1)'] == title) & (df['GO Term (level2)'] == name), up_or_down]
try:
y_values.append(result.values[0])#bug之处
except IndexError:
y_values.append(0)
return y_values
3.接下来就是将数据按属性和行为查找出来
def data_preprocessing():
y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
y_left_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (up)')
y_left_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (up)')
y_right_Biological_Process = data_extraction('Biological Process', 'number_of_out (down)')
y_right_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (down)')
y_right_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (down)')
return (y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component)
4.数据准备好后就是可视化,为了区分数据上升的数据用柱状图显示,下降的数据用折线图显示。
def data_to_visualization():
bar = (
Bar(init_opts=opts.InitOpts(
width="1200px",
height="500px",
animation_opts=opts.AnimationOpts(
animation_delay=1000, animation_easing="elasticOut"
)
))
.add_xaxis(xaxis_data=x_values, )
.add_yaxis(
series_name="Biological Process",
y_axis=list(y_left_Biological_Process),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Molecular Function",
y_axis=list(y_left_Molecular_Function),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Cellular Component",
y_axis=list(y_left_Cellular_Component),
label_opts=opts.LabelOpts(is_show=False),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="Number(Down)",
type_="value",
min_=0,
max_=25,
interval=1,
axislabel_opts=opts.LabelOpts(formatter="{value} ", color='red', font_weight="15px"),
name_textstyle_opts=opts.TextStyleOpts(color="red", font_size=15),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
),
legend_opts=opts.LegendOpts(
pos_top="1%",
pos_left="center",
item_width=30,
item_height=10,
border_color="rgba(0, 0, 0, 0)",
textstyle_opts={
"fontWeight": "bold"
}
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
axislabel_opts=opts.LabelOpts(rotate=-90)
),
yaxis_opts=opts.AxisOpts(
name="Number(Up)",
type_="value",
min_=0,
max_=25,
interval=1,
axislabel_opts=opts.LabelOpts(formatter="{value} ", color="blue", font_weight="15px"),
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
name_textstyle_opts=opts.TextStyleOpts(color="blue", font_size=15),
),
)
)
line = (
Line()
.add_xaxis(xaxis_data=x_values)
.add_yaxis(
series_name="Biological_Process_of_down",
yaxis_index=1,
y_axis=y_right_Biological_Process,
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Molecular_Function_of_down",
yaxis_index=1,
y_axis=y_right_Molecular_Function,
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Cellular_Component_of_down",
yaxis_index=1,
y_axis=y_right_Cellular_Component,
label_opts=opts.LabelOpts(is_show=False),
)
)
bar.overlap(line).render("visualization.html")
5.Bug定位,在第二步中说明了问题的出处
y_values.append(result.values[0])
我们打印下data_extraction提取出来的数据和类型看看
y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
print(y_left_Biological_Process,type(y_right_Biological_Process))
[20, 19, 4, 5, 6, 5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] <class 'list'>
可以看到数据和类型都是符合pyecharts的用法的
但最后画出来的图确实这样,而且只有部分值为的0才显示
如果我们将打印出来的数据直接写到
y_axis=list(y_left_Biological_Process)
#替换为
y_axis=list([20, 19, 4, 5, 6, 5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
画出来的图就是正常显示数据的。
这时候就一脸懵了????
难到有脏东西?
这时候我们就需要研究下pandas底层逻辑了,那我们直接根据pandas读取出来的二维列表查看源代码注释看看
可以看到在DataFrame类里面滴553行代码中官方有对DataFrame value的类型进行说明 数据的类型为int64
那我们回到刚才的列表y_left_Biological_Process遍历下里面的数据和类型
for meta in y_left_Biological_Process:
print(meta,type(meta))
# 1 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#0 <class 'int'>
#0 <class 'int'>
#0 <class 'int'>
可以看到列表中的元素类型都不一样,只有元素是int的才会显示,这样符合pyechart的逻辑。
那我们就简单在提取数据时强转下int试试
y_values.append(int(result.values[0]))
最后再重新跑下代码看看
这下数据就有了。
6.问题总结
一句话:多看看官方源代码注释
最后关于pyecharts也是好久没用了 很多方法属性其实看看官方源代码就好
7.全部代码
import pyecharts.options as opts
from pyecharts.charts import Bar, Line
import pandas as pd
df = pd.read_excel('data.xlsx')
x_values = list(df['GO Term (level2)'])
def data_extraction(title: str, up_or_down: str) -> list:
y_values = []
for name in x_values:
result = df.loc[(df['GO Term (level1)'] == title) & (df['GO Term (level2)'] == name), up_or_down]
try:
y_values.append(int(result.values[0]))
except IndexError:
y_values.append(0)
return y_values
def data_preprocessing():
y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
y_left_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (up)')
y_left_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (up)')
y_right_Biological_Process = data_extraction('Biological Process', 'number_of_out (down)')
y_right_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (down)')
y_right_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (down)')
return (y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component)
(y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component) = data_preprocessing()
def data_to_visualization():
bar = (
Bar(init_opts=opts.InitOpts(
width="1200px",
height="500px",
animation_opts=opts.AnimationOpts(
animation_delay=1000, animation_easing="elasticOut"
)
))
.add_xaxis(xaxis_data=x_values, )
.add_yaxis(
series_name="Biological Process",
y_axis=list(y_left_Biological_Process),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Molecular Function",
y_axis=list(y_left_Molecular_Function),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Cellular Component",
y_axis=list(y_left_Cellular_Component),
label_opts=opts.LabelOpts(is_show=False),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="Number(Down)",
type_="value",
min_=0,
max_=25,
interval=1,
axislabel_opts=opts.LabelOpts(formatter="{value} ", color='red', font_weight="15px"),
name_textstyle_opts=opts.TextStyleOpts(color="red", font_size=15),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
),
legend_opts=opts.LegendOpts(
pos_top="1%",
pos_left="center",
item_width=30,
item_height=10,
border_color="rgba(0, 0, 0, 0)",
textstyle_opts={
"fontWeight": "bold"
}
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
axislabel_opts=opts.LabelOpts(rotate=-90)
),
yaxis_opts=opts.AxisOpts(
name="Number(Up)",
type_="value",
min_=0,
max_=25,
interval=1,
axislabel_opts=opts.LabelOpts(formatter="{value} ", color="blue", font_weight="15px"),
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
name_textstyle_opts=opts.TextStyleOpts(color="blue", font_size=15),
),
)
)
line = (
Line()
.add_xaxis(xaxis_data=x_values)
.add_yaxis(
series_name="Biological_Process_of_down",
yaxis_index=1,
y_axis=y_right_Biological_Process,
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Molecular_Function_of_down",
yaxis_index=1,
y_axis=y_right_Molecular_Function,
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="Cellular_Component_of_down",
yaxis_index=1,
y_axis=y_right_Cellular_Component,
label_opts=opts.LabelOpts(is_show=False),
)
)
bar.overlap(line).render("visualization.html")
data_to_visualization()