数据预处理和可视化

Pandas处理数据后echarts图标不显示数据

闲来无事掏出了好久没做的数据预处理和可视化。
在这里插入图片描述

数据源是分为四列数据,第一列是记录的是数据的分类 总共分为了3大类,Biological Process,Molecular Function,Cellular Component。第二列数据则是一些不同的值,第三四列分别是上升和下降的一些数据。需求就是第二列数据显示为X轴,Y轴分别是上升和下降的数据 每个x轴对应的是其三大类的数据。其实对于这些数据。三大类无非就是属性,x轴对应数据就是具体变量,那上升和下降就是具体的行为。那只需要按属性和行为分类记录他们的变量就好,那就是6种数据。
1.先提出数据这里使用pandas,我们将第二列数据作为X轴坐标

df = pd.read_excel('data.xlsx')
x_values = list(df['GO Term (level2)'])

2.写一个方法,用于根据熟悉和行为提取数据
需要注意的是有些变量的属性是不存在行为的 那这时候就会抛出索引异常,这时候我们就以0填充进去

def data_extraction(title: str, up_or_down: str) -> list:
    y_values = []
    for name in x_values:
        result = df.loc[(df['GO Term (level1)'] == title) & (df['GO Term (level2)'] == name), up_or_down]
        try:
            y_values.append(result.values[0])#bug之处
        except IndexError:
            y_values.append(0)
    return y_values

3.接下来就是将数据按属性和行为查找出来

def data_preprocessing():
    y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
    y_left_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (up)')
    y_left_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (up)')
    y_right_Biological_Process = data_extraction('Biological Process', 'number_of_out (down)')
    y_right_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (down)')
    y_right_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (down)')
    return (y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
            y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component)

4.数据准备好后就是可视化,为了区分数据上升的数据用柱状图显示,下降的数据用折线图显示。

def data_to_visualization():
    bar = (
        Bar(init_opts=opts.InitOpts(
            width="1200px",
            height="500px",
            animation_opts=opts.AnimationOpts(
                animation_delay=1000, animation_easing="elasticOut"
            )
        ))

        .add_xaxis(xaxis_data=x_values, )
        .add_yaxis(
            series_name="Biological Process",
            y_axis=list(y_left_Biological_Process),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Molecular Function",
            y_axis=list(y_left_Molecular_Function),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Cellular Component",
            y_axis=list(y_left_Cellular_Component),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name="Number(Down)",
                type_="value",
                min_=0,
                max_=25,
                interval=1,
                axislabel_opts=opts.LabelOpts(formatter="{value} ", color='red', font_weight="15px"),
                name_textstyle_opts=opts.TextStyleOpts(color="red", font_size=15),
            )
        )

        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(
                is_show=True, trigger="axis", axis_pointer_type="cross"
            ),
            legend_opts=opts.LegendOpts(
                pos_top="1%",
                pos_left="center",
                item_width=30,
                item_height=10,
                border_color="rgba(0, 0, 0, 0)",
                textstyle_opts={
                    "fontWeight": "bold"
                }
            ),
            xaxis_opts=opts.AxisOpts(
                type_="category",
                axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
                axislabel_opts=opts.LabelOpts(rotate=-90)
            ),
            yaxis_opts=opts.AxisOpts(
                name="Number(Up)",
                type_="value",
                min_=0,
                max_=25,
                interval=1,
                axislabel_opts=opts.LabelOpts(formatter="{value} ", color="blue", font_weight="15px"),
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
                name_textstyle_opts=opts.TextStyleOpts(color="blue", font_size=15),
            ),
        )
    )

    line = (
        Line()
        .add_xaxis(xaxis_data=x_values)
        .add_yaxis(
            series_name="Biological_Process_of_down",
            yaxis_index=1,
            y_axis=y_right_Biological_Process,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Molecular_Function_of_down",
            yaxis_index=1,
            y_axis=y_right_Molecular_Function,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Cellular_Component_of_down",
            yaxis_index=1,
            y_axis=y_right_Cellular_Component,
            label_opts=opts.LabelOpts(is_show=False),
        )

    )

    bar.overlap(line).render("visualization.html")

5.Bug定位,在第二步中说明了问题的出处

 y_values.append(result.values[0])

我们打印下data_extraction提取出来的数据和类型看看

 y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
print(y_left_Biological_Process,type(y_right_Biological_Process))
[20, 19, 4, 5, 6, 5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] <class 'list'>

可以看到数据和类型都是符合pyecharts的用法的
但最后画出来的图确实这样,而且只有部分值为的0才显示
在这里插入图片描述
如果我们将打印出来的数据直接写到

 y_axis=list(y_left_Biological_Process)
 #替换为
  y_axis=list([20, 19, 4, 5, 6, 5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

画出来的图就是正常显示数据的。
这时候就一脸懵了????
难到有脏东西?
这时候我们就需要研究下pandas底层逻辑了,那我们直接根据pandas读取出来的二维列表查看源代码注释看看
在这里插入图片描述
可以看到在DataFrame类里面滴553行代码中官方有对DataFrame value的类型进行说明 数据的类型为int64
那我们回到刚才的列表y_left_Biological_Process遍历下里面的数据和类型

for meta in y_left_Biological_Process:
    print(meta,type(meta))
#  1 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#0 <class 'numpy.int64'>
#1 <class 'numpy.int64'>
#0 <class 'int'>
#0 <class 'int'>
#0 <class 'int'>  

可以看到列表中的元素类型都不一样,只有元素是int的才会显示,这样符合pyechart的逻辑。
那我们就简单在提取数据时强转下int试试

 y_values.append(int(result.values[0]))

最后再重新跑下代码看看
在这里插入图片描述
这下数据就有了。
6.问题总结
一句话:多看看官方源代码注释
最后关于pyecharts也是好久没用了 很多方法属性其实看看官方源代码就好
7.全部代码

import pyecharts.options as opts
from pyecharts.charts import Bar, Line
import pandas as pd

df = pd.read_excel('data.xlsx')
x_values = list(df['GO Term (level2)'])


def data_extraction(title: str, up_or_down: str) -> list:
    y_values = []
    for name in x_values:
        result = df.loc[(df['GO Term (level1)'] == title) & (df['GO Term (level2)'] == name), up_or_down]
        try:
            y_values.append(int(result.values[0]))
        except IndexError:
            y_values.append(0)
    return y_values


def data_preprocessing():
    y_left_Biological_Process = data_extraction('Biological Process', 'number_of_out (up)')
    y_left_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (up)')
    y_left_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (up)')
    y_right_Biological_Process = data_extraction('Biological Process', 'number_of_out (down)')
    y_right_Molecular_Function = data_extraction('Molecular Function', 'number_of_out (down)')
    y_right_Cellular_Component = data_extraction('Cellular Component', 'number_of_out (down)')
    return (y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
            y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component)

(y_left_Biological_Process, y_left_Molecular_Function, y_left_Cellular_Component,
 y_right_Biological_Process, y_right_Molecular_Function, y_right_Cellular_Component) = data_preprocessing()


def data_to_visualization():
    bar = (
        Bar(init_opts=opts.InitOpts(
            width="1200px",
            height="500px",
            animation_opts=opts.AnimationOpts(
                animation_delay=1000, animation_easing="elasticOut"
            )
        ))

        .add_xaxis(xaxis_data=x_values, )
        .add_yaxis(
            series_name="Biological Process",
            y_axis=list(y_left_Biological_Process),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Molecular Function",
            y_axis=list(y_left_Molecular_Function),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Cellular Component",
            y_axis=list(y_left_Cellular_Component),
            label_opts=opts.LabelOpts(is_show=False),
        )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name="Number(Down)",
                type_="value",
                min_=0,
                max_=25,
                interval=1,
                axislabel_opts=opts.LabelOpts(formatter="{value} ", color='red', font_weight="15px"),
                name_textstyle_opts=opts.TextStyleOpts(color="red", font_size=15),
            )
        )

        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(
                is_show=True, trigger="axis", axis_pointer_type="cross"
            ),
            legend_opts=opts.LegendOpts(
                pos_top="1%",
                pos_left="center",
                item_width=30,
                item_height=10,
                border_color="rgba(0, 0, 0, 0)",
                textstyle_opts={
                    "fontWeight": "bold"
                }
            ),
            xaxis_opts=opts.AxisOpts(
                type_="category",
                axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
                axislabel_opts=opts.LabelOpts(rotate=-90)
            ),
            yaxis_opts=opts.AxisOpts(
                name="Number(Up)",
                type_="value",
                min_=0,
                max_=25,
                interval=1,
                axislabel_opts=opts.LabelOpts(formatter="{value} ", color="blue", font_weight="15px"),
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
                name_textstyle_opts=opts.TextStyleOpts(color="blue", font_size=15),
            ),
        )
    )

    line = (
        Line()
        .add_xaxis(xaxis_data=x_values)
        .add_yaxis(
            series_name="Biological_Process_of_down",
            yaxis_index=1,
            y_axis=y_right_Biological_Process,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Molecular_Function_of_down",
            yaxis_index=1,
            y_axis=y_right_Molecular_Function,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .add_yaxis(
            series_name="Cellular_Component_of_down",
            yaxis_index=1,
            y_axis=y_right_Cellular_Component,
            label_opts=opts.LabelOpts(is_show=False),
        )

    )

    bar.overlap(line).render("visualization.html")


data_to_visualization()

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值