pandas 学习笔记15 pandas样式

最新推荐文章于 2024-08-13 08:46:23 发布

木易弓虽

最新推荐文章于 2024-08-13 08:46:23 发布

阅读量2.4k

点赞数

分类专栏： pandas 文章标签： python 开发语言后端

本文链接：https://blog.csdn.net/weixin_43237709/article/details/121622736

版权

pandas 专栏收录该内容

12 篇文章 0 订阅

订阅专栏

1 内置样式

import pandas as pd
import numpy as np

#内置样式 df.style Styler CSS 实现 依赖库jinja2
df = pd.read_excel(r'D:\pythonproject\pythonVsc\pandas\data\team.xlsx')
df.style #样式对象，可显示所有数据
type(df.style) #查看样式 pandas.io.formats.style.Styler
# 空值高亮 style.highlight_null() 空值高亮，增加背景颜色
# df.iloc[1,1]=np.NaN #将一个值改为空值
# df.head().style.highlight_null() #将空值高亮，默认为红色背景
# #指定背景颜色
# df.head().style.highlight_null(null_color='blue') #使用颜色名
# df.head().style.highlight_null(null_color='#ccc') #使用颜色值

# df.head().style.highlight_min() #将最小值最亮，默认值为黄色
# df.head().style.highlight_max()
# #同时使用，并指定颜色
# (df.head()
# .style.highlight_max(color='lime') #将最大值高亮，并指定颜色
# .highlight_min() #将最小值高亮
# )
#指定行级 代码测试未通过 暂时略
# df.style.highlight_min(subset=['Q1']) #只对Q1起作用
# df.style.highlight_min(subset=['Q1','Q2']) #对Q1,Q2起作用
# df.style.highlight_min(subset=pd.IndexSlice[:10,['Q1','Q3']]) #范围内的最小值
# df.style.highlight_min(axis=1,subset=['Q1','Q2']) #安行，只在这两列进行

#背景渐变 依赖matplotlib库 颜色表网址：https://matplotlib.org/devdocs/gallery/color/colormap_reference.html
df.head().style.background_gradient() #数字类型按列渐变
df.head().style.background_gradient(subset=['Q1'],cmap='BuGn') #指定列，指定颜色系列
df.style.background_gradient(low=0.6,high=0) #低百分比和高百分比范围，更换颜色时避免使用所有色域
df.style.background_gradient(text_color_threshold=0.5) #内容的颜色取0-1（深色到浅色），方便凸显文本
df.style.background_gradient(vmin=60,vmax=100) #颜色应用的取值范围，不在这个范围的不应用
#综合应用
(df.head(10)
.style
.background_gradient(subset=['Q1'],cmap='spring')#指定色系
.background_gradient(subset=['Q2'],vmin=60,vmax=100)
.background_gradient(subset=['Q3'],low=0.6,high=0)
.background_gradient(subset=['Q4'],text_color_threshold=0.9)
)
df.head().style.bar(subset=['Q4'],vmin=50,vmax=100) #显示Q4的条形统计图
df.style.bar()#基本用法，默认对数字应用
df.style.bar(subset=['Q1']) #指定应用范围
#定义颜色
df.style.bar(color='green')
df.style.bar(color='#ff11bb')
df.style.bar(axis=1) #以行方式进行计算和显示
df.style.bar(width=80) #样式在格中的占位百分比，0-100,100占满
#对齐方式：left-最小值开始，zero-0值在中间，mid-(max-min)/2值在中间，负（正）值0在右（左）
df.style.bar(align='mid')
df.style.bar(vmin=60,vmax=100) #大小基准值
#eg
(
    df.head(10)
    .assign(avg=df.mean(axis=1,numeric_only=True))#增加平均值
    .assign(diff = lambda x:x.avg.diff())#和前一位同学的差值
    .style
    .bar(subset=['Q1'],color='yellow')
    .bar(subset=['avg'],
        width=90,
        align='mid',
        vmin=60,vmax=100,
        color='#5CADAD')
    .bar(subset=['diff'],
        color=['#ffe4e4','#bbf9ce'],#上涨和下涨的颜色
        vmin=0,vmax=30,#以0为基础的上下30
        align='zero')#0值居中
)

2 常用方法

# 常用方法 Styler.format 实现丰富多彩多样的数据格式显示
'''
Styler.format(self,formatter,[str,callable,dict,None]中的任一一个，一般是一个字典，可以是函数，字符串格式参考python字符串方法
            subset=None,
            na_rep:Union[str,NoneType]=None)
'''
df.head().style.format('[{}]') #给所有数据加一个方括号
df.head().style.format('{:.2%}',subset=['Q1']) #百分号 文本不可以
df.style.format({'name':str.upper}) #指定列全部变大写
df.style.format({'Q1':'{:0<4.0f}','Q2':'{:+.2f}'}) #Q1保留四位，Q2两位小数，并显示符号
df.style.format('{:.2%}',na_rep='-',subset=['Q1'])
'''
常用格式
{'a':'￥{0：,.0f}',#货币符号
 'b':'{:%Y-%m}',#年月
 'c':'{:.2%}',# 百分号
 'd':'{:.f}',# 千分位
 'e':str.upper，#大写
}
'''
(
    df.head(10)
    .assign(avg=df.mean(axis=1,numeric_only=True)/100) #增加平均值百分比
    .assign(diff = lambda x:x.avg.diff())#与前一位同学差值
    .style
    .format({'name':str.upper,'avg':'{:.2%}','diff':'￥{:.2f}'},na_rep='-')
)

3样式的高级操作

# 样式的高级操作
# 1 样式的配置操作 .set_caption('xxx') 添加标题
df.head().style.set_caption('学生成绩表') #添加标题
df.style.format(precision=2) #保留两位小数
df.round(2).style #同上
(
    df.assign(mean=df.mean(1))
    .style
    # .format(precision=4) #保留四位小数
    .set_precision(4)
)
# 缺失值显示
na=np.nan
(
    df.head()
    .eval('Q4=@na') # 设置Q4为缺失列
    .style
    .set_na_rep('暂无')
)
df.style.hide_index() #不输出索引
df.style.hide_columns(['Q1','Q2']) #不输出指定列

# 2 表格css样式 使用.set_properties()
df.style.set_properties(subset=['Q1'],**{'color':'red'}) #将Q1列文字设置成红色
df.style.set_properties(color='white',align='right')
df.style.set_properties(**{'background-color':'yellow'})
df.style.set_properties(**{'width':'100px','font-size':'18px'})
df.style.set_properties(**{'background-color':'black',  
                            'color':'lawngreen',
                            'border-color':'white'})
#.set_table_attributes()用于给《table》标签增加属性
df.style.set_table_attributes('class="pure-table"')
# .set_table_styles()设置表格样式属性
# 给所有的行（tr标签）的hover方法设置黄色背景
# 效果是当鼠标移动上去时整行背景变黄
df.style.set_table_styles(
    [{'selector':'tr:hover',
    'props':[('background-color','yellow')]}]
)
#.set_uuid() 为每个单元格的td标签id属性增加一个符缀
df.style.set_uuid(9999) #为每一个表格增加一个相同的前缀
# 3 应用函数 
#将最大值显示为红色
def highlight_max(x):
    return ['color:red' if v==x.max() else '' for v in x]
df.style.apply(highlight_max) #应用函数
df.loc[:,'Q1':'Q4'].style.apply(highlight_max,axis=1) #按行应用
# 按条件整行添加背景色
def background_color(row):
    if row.pv_num>=10000:
        return ['background-color:red']*len(row)
    elif row.pv_num>=100:
        return ['background-color:yellow']*len(row) 
    return ['']*len(row)
df.style.apply(background_color,axis=1)
'''
# 按条件整行添加背景色
# 此段代码测试未通过
def background_color(row):
    if row.pv_num>=10000:
        return ['background-color:red']*len(row)
    elif row.pv_num>=100:
        return ['background-color:yellow']*len(row) 
    return ['']*len(row)
df.style.apply(background_color,axis=1)
df.style.apply(lambda x:['background-color:yellow']*len(x) if x.math>80 else ['']*len(x),axis=1)
'''
bg = lambda x:'background-color:yellow' if type(x)==int and x>90 else '' #定义函数，只对数字起作用，将大于90的值设置背景色为红色
df.style.applymap(bg) #应用函数，作用于全部数字列
df.style.applymap(bg,subset=pd.IndexSlice[:,['Q1']]) #指定Q1列
#定义样式函数 name列全部大写，所有字体放大到200%
def my_style(styler):
    return (styler.set_properties(**{'font-size':'200%'}).format({'name':str.upper}))
df.style.pipe(my_style) #应用管道方法
'''
# 4 样式复用
#此段代码未经测试
style1=df.style.applymap(color_negative_red) #将df的样式赋值给变量
style2=df2.style #df2的样式
style2.use(style1.export()) #使用style1的样式
'''
# 5 样式清除 df.style.clear()
dfs = df.loc[:,'Q1':'Q4'].style.apply(highlight_max) #定义一个变量
dfs.clear() #清除 但还是styler对象

# 6 导出excel 或者 html
df.style.to_excel('aa.xlsx') #导出excel
df.style.to_excel('aa.xlsx',engine='openpyxl') #使用指定引擎
dfs.to_excel('aa.xlsx',sheet_name='Sheet1') #指定标签页名
dfs.to_excel('aa.xlsx',na_rep='-')#指定缺失值处理方式
dfs.to_excel('aa.xlsx',float_format="%.2f") #浮点数格式，保留两位小数
dfs.to_excel('aa.xlsx',columns=['Q','Q2'])#只要这两列
dfs.to_excel('aa.xlsx',header=False) #不带表头
dfs.to_excel('aa.xlsx',index=False)#不带索引
dfs.to_excel('aa.xlsx',index=['team','name']) #指定索引，多个值代表多层索引
dfs.to_excel('aa.xlsx',startrow=10,startcol=3) #从哪行取，从哪列读
dfs.to_excel('aa.xlsx',merge_cells=False) #不合并单元格
dfs.to_excel('aa.xlsx',encoding='utf-8') #指定编码格式
dfs.to_excel('aa.xlsx',inf_rep='inf')#无穷大表示法（excel中无）
dfs.to_excel('aa.xlsx',verbose=True)#在错误日志中显示更多信息
dfs.to_excel('aa.xlsx',freeze_panes=(0,2))#指定要冻结的最低行和最右列

# 7 生成HTML 
'''
Styler.render()输出html
参数：
head
cellstyle
body
uuid
percision
table_styles
caption
table_attributes
'''
df.style.render() #生成html
df.style.highlight_null().render().split('\n')[:10] #过滤换行符，读取部分数据，增加可读性
#在jupyter notebook中利用ipython来展示html
from IPython.display import HTML
HTML(df.style.render())