可参考链接:
API reference — pandas 1.4.1 documentationhttps://pandas.pydata.org/pandas-docs/stable/reference/index.html系统的学会 Pandas 基本使用看这一篇就够了 - 知乎作者:Ma Sizhou 原文链接:https://blog.csdn.net/weixin_45901519/article/details/112980822 推荐阅读:怎么自学python,大概要多久?哪些 Python 库让你相见恨晚?你们都用Python实现了哪些办公自动化?1、Pan…
https://zhuanlan.zhihu.com/p/385494934
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print("------------一维-------------")
s=pd.Series([1,3,5,np.nan,6,8])
print("s=pd.Series([1,3,5,np.nan,6,8]):\n",s)
'''
s=pd.Series([1,3,5,np.nan,6,8]):
0 1.0
1 3.0
2 5.0
3 NaN
4 6.0
5 8.0
dtype: float64
'''
print("s.loc[2]:\n",s.loc[2])
'''
s.loc[2]:
5.0
'''
print("s.loc[1:2]:\n",s.loc[1:2])
'''
s.loc[1:2]:
1 3.0
2 5.0
'''
print("------------二维-------------")
dates=pd.date_range('20130101',periods=6)#时间序列
print("dates=pd.date_range('20130101',periods=6):\n",dates)
'''
dates=pd.date_range('20130101',periods=6):
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'],
dtype='datetime64[ns]', freq='D')
'''
#通过传递一个 numpy array,创建DataFrame
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
print("df:\n",df)
'''
df:
A B C D
2013-01-01 -1.482591 -0.785298 -1.362526 0.337269
2013-01-02 0.149812 -0.382297 -0.924572 -1.269442
2013-01-03 1.032738 -0.200812 -0.019716 1.132798
2013-01-04 0.984295 -0.710752 0.978233 -0.050768
2013-01-05 0.236064 -0.515189 0.768145 0.332022
2013-01-06 -0.025268 -1.387040 0.413894 -1.250767
'''
#通过传递一个能够被转换成类似序列结构的字典对象来创建一个DataFrame
df2=pd.DataFrame({'A':1.,
'B':pd.Timestamp('20130102'),
'C':pd.Series(1,index=list(range(4)),dtype='float32'),
'D':np.array([3]*4,dtype='int32'),
'E':pd.Categorical(["test","train","test","train"]),
'F':'foo'
})
print("df2:\n",df2)
'''
df2:
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
'''
print("df2.dtypes:\n",df2.dtypes)
'''
df2.dtypes:
A float64
B datetime64[ns]
C float32
D int32
E category
F object
dtype: object
'''
#查看DataFrame中头部和尾部的行
print("df2.head:\n",df2.head(3))
print("df2.tail:\n",df2.tail(2))
'''
df2.head:
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
df2.tail:
A B C D E F
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
'''
#转置
print("转置df.T:\n",df.T)
'''
转置df.T:
2013-01-01 2013-01-02 2013-01-03 2013-01-04 2013-01-05 2013-01-06
A -0.049734 -0.503263 -0.288575 1.439278 0.164564 -0.729201
B 0.438406 1.284804 -0.615318 0.333119 0.471311 0.573108
C -1.707385 -0.478150 -0.754238 -0.204792 0.604211 1.266928
D 0.838109 1.315133 -0.684641 0.639650 -0.936113 -0.375572
'''
#排序,默认按照y轴(index)排序
print("按x轴排序df.sort_index(axis=1,ascending=False):\n",df.sort_index(axis=1,ascending=False))
'''
按x轴排序df.sort_index(axis=1,ascending=False):
D C B A
2013-01-01 -0.005231 1.146550 0.076886 1.738359
2013-01-02 -0.509110 0.158257 0.260332 -0.491110
2013-01-03 -0.897636 1.174557 -0.245847 -0.437432
2013-01-04 -1.170421 1.715993 -1.938287 0.111352
2013-01-05 0.068610 0.768638 -1.764390 0.535152
2013-01-06 1.161581 0.095728 0.671962 1.010305
'''
#按值排序
print("按B列升序df.sort_values(by='B',ascending=True):\n",df.sort_values(by='B',ascending=True))
'''
df.sort_values(by='B'):
A B C D
2013-01-05 -0.687070 -2.419771 1.169795 0.396717
2013-01-02 0.261291 -0.907055 1.920351 0.928672
2013-01-04 1.466044 -0.190614 0.386126 -0.396150
2013-01-03 0.933971 -0.026125 -0.347587 -1.114554
2013-01-01 1.570273 0.226476 -1.853670 -0.352678
2013-01-06 2.118436 0.567099 0.460271 1.193028
'''
#选择一个单独的列,这将会返回一个 Series ,等同于 df.A
print(df['A'])
print(df.A)
'''
2013-01-01 0.785102
2013-01-02 0.941444
2013-01-03 -0.273664
2013-01-04 1.177939
2013-01-05 -0.281529
2013-01-06 0.790603
Freq: D, Name: A, dtype: float64
'''
#通过[]进行选择,会对行进行切片
print("df[2:4]:\n",df[2:4])
print(df['20130103':'20130104'])
'''
df[2:4]:
A B C D
2013-01-03 1.111260 -0.771821 0.110644 -1.905335
2013-01-04 0.988968 0.115402 -0.563864 0.710552
'''
#使用标签获取一个区域
print("df.loc[dates[0]]:\n",df.loc[dates[0]])
'''
df.loc[dates[0]]:
A 0.024117
B 0.194058
C -0.531231
D 0.524026
Name: 2013-01-01 00:00:00, dtype: float64
'''
#通过标签来在多个轴上进行选择
print("df.loc[:,['A','C']]:\n",df.loc[:,['A','C']])
'''
df.loc[:,['A','C']]:
A C
2013-01-01 0.062463 -1.492969
2013-01-02 -1.217163 0.433442
2013-01-03 -0.663532 -0.352696
2013-01-04 0.327094 -0.561332
2013-01-05 -1.794773 0.535678
2013-01-06 -1.739319 0.652672
'''
#标签切片
print("df.loc['20130102':'20130104',['A','C']]:\n",df.loc['20130102':'20130104',['A','C']])
'''
df.loc['20130102':'20130104',['A','C']]:
A C
2013-01-02 -1.453010 -0.390515
2013-01-03 0.279680 1.012361
2013-01-04 -1.734347 -1.097504
'''
#对于返回的对象进行维度缩减
print("df.loc['20130102',['A','C']]:\n",df.loc['20130102',['A','C']])
'''
df.loc['20130102',['A','C']]:
A -0.255447
C -0.116441
Name: 2013-01-02 00:00:00, dtype: float64
'''
#获取一个标量
print("df.loc['20130102','C']:\n",df.loc['20130102','C'])
print("df.at['20130102','C']:\n",df.at['20130102','C'])
'''
df.loc['20130102','C']:
-1.6785780025303105
df.at['20130102','C']:
-1.6785780025303105
'''
#通过传递数值进行位置选择(选择的是行)
print("选择第三行df.iloc[3]:\n",df.iloc[3])
'''
选择第三行df.iloc[3]:
A 1.640583
B -0.571066
C -0.094988
D -0.086394
Name: 2013-01-04 00:00:00, dtype: float64
'''
#通过数值进行切片,与 numpy/python 中的情况类似
print("df.iloc[3:5,0:2]:\n",df.iloc[3:5,0:2])
'''
df.iloc[3:5,0:2]:
A B
2013-01-04 0.741402 -0.380690
2013-01-05 -0.589039 -0.425458
'''
#通过指定一个位置的列表,与numpy/python中的情况类似
print("df.iloc[[1,2,4],[0,2]]:\n",df.iloc[[1,2,4],[0,2]])
'''
df.iloc[[1,2,4],[0,2]]:
A C
2013-01-02 -0.166991 -1.862785
2013-01-03 -2.382871 -1.204376
2013-01-05 0.395470 -0.455911
'''
#对行进行切片
print("df.iloc[1:3,:]:\n",df.iloc[1:3,:])
'''
df.iloc[1:3,:]:
A B C D
2013-01-02 0.827036 -1.654945 -0.181677 -1.008769
2013-01-03 0.972622 -0.430666 1.891469 1.046815
'''
#对列进行切片
print("df.iloc[:,1:3]:\n",df.iloc[:,1:3])
'''
df.iloc[:,1:3]:
B C
2013-01-01 -0.907663 0.993300
2013-01-02 0.298474 1.593479
2013-01-03 -1.058314 1.226099
2013-01-04 -0.290674 -1.156593
2013-01-05 0.150098 0.346733
2013-01-06 1.069454 -0.105687
'''
#获取特定的值
print("df.iloc[1,1]:\n",df.iloc[1,2])
print("df.iat[1,1]:\n",df.iat[1,2])
'''
df.iloc[1,1]:
1.641894272456916
df.iat[1,1]:
1.641894272456916
'''
#布尔索引-使用一个单独列的值来选择数据
print("df[df.B>0]:\n",df[df.B>0])
'''
df[df.B>0]:
A B C D
2013-01-01 1.551455 0.068040 -0.415197 -0.152619
2013-01-03 -0.091643 0.062380 0.681666 0.964527
2013-01-05 -0.243168 0.549515 0.537430 -0.924323
2013-01-06 -1.141784 0.769010 0.042631 1.071534
'''
#使用where操作来选择数据
print("df[df.B>0]:\n",df[df>0])
'''
df[df.B>0]:
A B C D
2013-01-01 1.073533 0.619155 0.897115 1.587666
2013-01-02 1.161001 0.689794 0.569217 0.682907
2013-01-03 NaN 0.651462 1.405717 0.437036
2013-01-04 NaN 0.868641 0.198289 0.484145
2013-01-05 0.979206 NaN NaN NaN
2013-01-06 0.155112 0.661658 NaN NaN
'''
#使用isin()方法来过滤
df['E']=['one', 'one','two','three','four','three']
print("df:\n",df)
print("df[df[E].isin(['two','four']):\n",df[df['E'].isin(['two','four'])])
'''
df:
A B C D E
2013-01-01 -0.883201 -0.418311 -1.359913 -0.366866 one
2013-01-02 0.405952 -1.178009 1.893223 -2.121690 one
2013-01-03 0.709221 1.017035 -0.460762 0.028415 two
2013-01-04 -0.146659 0.298716 -1.785458 1.286745 three
2013-01-05 1.746387 1.938101 -0.165001 0.289930 four
2013-01-06 -0.037218 0.848514 1.287841 -0.511670 three
df[df[E].isin(['two','four']):
A B C D E
2013-01-03 0.709221 1.017035 -0.460762 0.028415 two
2013-01-05 1.746387 1.938101 -0.165001 0.289930 four
'''
#设置一个新的列
s1=pd.Series([1,2,3,4,5,6],index=pd.date_range('20130102',periods=6))
print("s1:\n",s1)
df['F']=s1
print("df:\n",df)
'''
s1:
2013-01-02 1
2013-01-03 2
2013-01-04 3
2013-01-05 4
2013-01-06 5
2013-01-07 6
Freq: D, dtype: int64
df:
A B C D E F
2013-01-01 -0.857621 0.102242 1.774687 -0.914860 one NaN
2013-01-02 0.698193 0.137528 0.684578 0.222057 one 1.0
2013-01-03 -1.286020 1.925610 0.775748 0.967149 two 2.0
2013-01-04 -0.615974 0.851330 -0.565220 -0.583578 three 3.0
2013-01-05 0.249727 -0.198400 -0.944865 -0.285109 four 4.0
2013-01-06 2.045587 3.064873 0.156069 1.018338 three 5.0
'''
#通过标签设置新的值
df.at[dates[0],'A'] = 3
print("通过标签设置新的值df.at[dates[0],'A'] = 3:\n",df)
#通过位置设置新的值
df.iat[0,1] = 4
print("通过标签设置新的值df.iat[0,1] = 4:\n",df)
'''
通过标签设置新的值df.at[dates[0],'A'] = 3:
A B C D E F
2013-01-01 3.000000 -1.050261 -0.497598 -1.032525 one NaN
2013-01-02 -1.831707 -0.007977 1.563744 -0.321006 one 1.0
2013-01-03 -1.674681 -1.558105 0.165075 -0.492901 two 2.0
2013-01-04 0.146576 -0.942221 1.086702 1.146223 three 3.0
2013-01-05 -0.133029 0.033084 -0.226648 0.615227 four 4.0
2013-01-06 -0.369951 -1.439073 0.845186 0.446455 three 5.0
通过标签设置新的值df.iat[0,1] = 4:
A B C D E F
2013-01-01 3.000000 4.000000 -0.497598 -1.032525 one NaN
2013-01-02 -1.831707 -0.007977 1.563744 -0.321006 one 1.0
2013-01-03 -1.674681 -1.558105 0.165075 -0.492901 two 2.0
2013-01-04 0.146576 -0.942221 1.086702 1.146223 three 3.0
2013-01-05 -0.133029 0.033084 -0.226648 0.615227 four 4.0
2013-01-06 -0.369951 -1.439073 0.845186 0.446455 three 5.0
'''
#通过一个numpy数组设置一组新值
df.loc[:,'D']=np.array([5]*len(df))
print("df.loc[:,'D']=np.array([5]*len(df)):\n",df)
'''
df.loc[:,'D']=np.array([5]*len(df)):
A B C D E F
2013-01-01 3.000000 4.000000 -0.180801 5 one NaN
2013-01-02 -0.275247 2.382071 -1.186903 5 one 1.0
2013-01-03 0.377389 0.333934 -0.374475 5 two 2.0
2013-01-04 1.143876 0.738891 -1.089024 5 three 3.0
2013-01-05 0.437958 0.530188 0.796672 5 four 4.0
2013-01-06 1.128492 0.053748 -1.122978 5 three 5.0
'''
#通过where操作来设置新的值
df2=df.copy()
df2.__delitem__('E')
df2[df2>0]=-df2
print("df2[df2>0]=-df2:\n",df2)
'''
df2[df2>0]=-df2:
A B C D F
2013-01-01 -3.000000 -4.000000 -0.547165 -5 NaN
2013-01-02 -0.485141 -0.811214 -0.987358 -5 -1.0
2013-01-03 -1.750152 -0.141212 -1.656073 -5 -2.0
2013-01-04 -0.608659 -1.108972 -2.193543 -5 -3.0
2013-01-05 -0.538733 -0.263301 -0.840352 -5 -4.0
2013-01-06 -0.245512 -2.980073 -0.096134 -5 -5.0
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd1
plt.rcParams['font.sans-serif'] = ['KaiTi']
plt.rcParams['font.serif'] = ['KaiTi']
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串
sr=pd1.Series(['hello world','this is a series'])
print("-------字符sr--------")
print(sr)
'''return:
0 hello world
1 this is a series
'''
print('元素个数:',sr.count())
print('每个元素的元素个数:\n',sr.str.count(' ')+1)
print('index:\n',sr.index)
print('元素求和:\n',sr.sum())
'''return:
元素个数: 2
每个元素的元素个数:
0 2
1 4
dtype: int64
index:
RangeIndex(start=0, stop=2, step=1)
元素求和:
hello worldthis is a series
'''
df3=pd1.DataFrame({'食物':['苹果','橘子','黄瓜','番茄','五花肉'],
'价格':[7,5,4,3,12],
'数量':[5,8,3,4,2]})
print(df3)
'''
食物 价格 数量
0 苹果 7 5
1 橘子 5 8
2 黄瓜 4 3
3 番茄 3 4
4 五花肉 12 2
'''
df3_pivot=df3.pivot\
(index='数量',
columns='食物',
values='价格'
)
print('df3_pivot:\n',df3_pivot)
'''
df3_pivot:
食物 五花肉 橘子 番茄 苹果 黄瓜
数量
2 12.0 NaN NaN NaN NaN
3 NaN NaN NaN NaN 4.0
4 NaN NaN 3.0 NaN NaN
5 NaN NaN NaN 7.0 NaN
8 NaN 5.0 NaN NaN NaN
'''
map_dict = {
'橘子':'水果',
'番茄':'蔬菜',
'苹果':'水果',
'黄瓜':'蔬菜',
'五花肉':'肉类'
}
df3['分类']=df3['食物'].map(map_dict)
print(df3)#将不同的水果根据字典映射到分类中
'''
食物 价格 数量 分类
0 苹果 7 5 水果
1 橘子 5 8 水果
2 黄瓜 4 3 蔬菜
3 番茄 3 4 蔬菜
4 五花肉 12 2 肉类
'''
#通过函数进行映射
f=lambda x:map_dict[x]
print(df3['食物'].map(f))#打印食物的分类映射结果
'''
0 水果
1 水果
2 蔬菜
3 蔬菜
4 肉类
'''
df3.replace(['苹果','五花肉','番茄'],['苹','肉','番'])
print(df3.replace(['苹果','五花肉','番茄'],['苹','肉','番']))#替换结果
'''
食物 价格 数量 分类
0 苹 7 5 水果
1 橘子 5 8 水果
2 黄瓜 4 3 蔬菜
3 番 3 4 蔬菜
4 肉 12 2 肉类
'''
df1 = pd1.DataFrame(data=np.arange(12).reshape(-1,3),index=list('ABCD'),columns=['aa','ab','ac'])
df1_1 = pd1.DataFrame(data=np.ones((3,3),dtype=int),index=list('BCE'),columns=['aa','ab','ad'])
df2=pd1.DataFrame(data=[[100,90,80],[80,78,87]],index=['一班','二班'],columns=['数学平均分','语文平均分','英语平均分'])
print("------df1--------")
print(df1)
'''return:
aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
D 9 10 11
'''
print("------df2--------")
print(df2)
'''return:
数学平均分 语文平均分 英语平均分
一班 100 90 80
二班 80 78 87
'''
print("------df1.index--------")
print(df1.index)
'''return:
Index(['A', 'B', 'C', 'D'], dtype='object')
'''
print("------df1.columns--------")
print(df1.columns)
'''return:
Index(['aa', 'ab', 'ac'], dtype='object')
'''
print("------index访问:df1['A':'C']--------")
print(df1['A':'C'])
''':return
aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
'''
print("df1.ab:\n",df1.ab)
'''
df1.ab:
A 1
B 4
C 7
D 10
'''
print("------index访问:df1[0:3]--------")
print(df1[0:3])
'''return:
aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
'''
print("------列访问:df1['aa']--------")
print(df1['aa'])
'''return:
A 0
B 3
C 6
D 9
Name: aa, dtype: int32
'''
print("------访问某一个数据:df1['ab']['B':'B']--------")
print("df1['ab']['B':'B']:\n",df1['ab']['B':'B'])
'''return:
df1['ab']['B':'B']:
B 4
Name: ab, dtype: int32
'''
print("df1.loc['B','ab']:\n",df1.loc['B','ab'])
'''return:
df1.loc['B','ab']:
4
'''
print("------获取ab=4的行:query('ab == 4')--------")
print(df1.query('ab == 4'))
'''
aa ab ac
B 3 4 5
'''
print("------筛选或屏蔽df1中奇数项,其它用NaN代替:where(df1%2==0)--------")
print(df1.where(df1%2==0))
'''
aa ab ac
A 0.0 NaN 2.0
B NaN 4.0 NaN
C 6.0 NaN 8.0
D NaN 10.0 NaN
'''
print("------获取ab列:get('ab')--------")
print(df1.get('ab'))
'''
A 1
B 4
C 7
D 10
'''
#求平方
print("------求平方:df1**2--------")
print(df1**2)
print(df1)
'''return:
aa ab ac
A 0 1 4
B 9 16 25
C 36 49 64
D 81 100 121
aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
D 9 10 11
'''
#需要改部分列名
print(df1.rename(columns={'aa':'Aa'}))
'''
Aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
D 9 10 11
'''
#需要改部分行名
print(df1.reindex(list('BCDE')))
'''return:
aa ab ac
B 3.0 4.0 5.0
C 6.0 7.0 8.0
D 9.0 10.0 11.0
E NaN NaN NaN
'''
print(df1.set_index('aa'))
'''
ab ac
aa
0 1 2
3 4 5
6 7 8
9 10 11
'''
print(df1.reset_index())
'''
index aa ab ac
0 A 0 1 2
1 B 3 4 5
2 C 6 7 8
3 D 9 10 11
'''
df1.index.name='idx'
print("df1.index.name='idx':\n",df1)
'''
aa ab ac
idx
A 0 2 4
B 6 8 10
C 12 14 16
D 18 20 22
'''
df1=df1.sort_index(ascending=False)#ascending=False降序排列
print("df1.sort_index='idx排序':\n",df1)
df1=df1.sort_values(by='aa',ascending=True)#ascending=True升序排列
print("df1.sort_values='idx排序':\n",df1)
'''
df1.sort_index='idx排序':
aa ab ac
idx
D 9 10 11
C 6 7 8
B 3 4 5
A 0 1 2
df1.sort_values='idx排序':
aa ab ac
idx
A 0 1 2
B 3 4 5
C 6 7 8
D 9 10 11
'''
print('各列归一化apply:\n',df1.apply(lambda x:x/np.max(x)))
print('dataframe-applymap:\n',df1.applymap(lambda x:x*2))
'''
各列归一化apply:
aa ab ac
A 0.000000 0.1 0.181818
B 0.333333 0.4 0.454545
C 0.666667 0.7 0.727273
D 1.000000 1.0 1.000000
dataframe-applymap:
aa ab ac
A 0 2 4
B 6 8 10
C 12 14 16
D 18 20 22
'''
print('df1:\n',df1)
print('df1_1:\n',df1_1)
df_concat=pd1.concat([df1,df1_1],join='outer',axis=1,sort=True)
df_merge=pd1.merge(df1,df1_1,how='outer',left_index=True,right_index=True,sort=True)
#在pandas中,使用 np.nan来代替缺失值
df_concat=df_concat.replace(np.nan,11)#替换NaN,
df_merge=df_merge.replace(np.nan,11)#替换NaN
print('pd_concat df1和df1_1:\n',df_concat)
print('pd_merge df1和df1_1:\n',df_merge)
'''
df1:
aa ab ac
A 0 1 2
B 3 4 5
C 6 7 8
D 9 10 11
df1_1:
aa ab ad
B 1 1 1
C 1 1 1
E 1 1 1
pd_concat df1和df1_1:
aa ab ac aa ab ad
A 0.0 1.0 2.0 11.0 11.0 11.0
B 3.0 4.0 5.0 1.0 1.0 1.0
C 6.0 7.0 8.0 1.0 1.0 1.0
D 9.0 10.0 11.0 11.0 11.0 11.0
E 11.0 11.0 11.0 1.0 1.0 1.0
pd_merge df1和df1_1:
aa_x ab_x ac aa_y ab_y ad
A 0.0 1.0 2.0 11.0 11.0 11.0
B 3.0 4.0 5.0 1.0 1.0 1.0
C 6.0 7.0 8.0 1.0 1.0 1.0
D 9.0 10.0 11.0 11.0 11.0 11.0
E 11.0 11.0 11.0 1.0 1.0 1.0
'''
#分组聚合groupby
'''DataFrame.agg(func,axis = 0,* args,** kwargs )'''
df_concat_broupby=df_concat.agg(['mean','sum','count'])
print(df_concat_broupby)
'''
aa ab ac aa ab ad
mean 5.8 6.6 7.4 5.0 5.0 5.0
sum 29.0 33.0 37.0 25.0 25.0 25.0
count 5.0 5.0 5.0 5.0 5.0 5.0
'''
df_concat_mean=df_concat.agg(['max','min','mean'])
df_concat_mean.index.name='index'
print(df_concat_mean)
'''
aa ab ac aa ab ad
max 11.0 11.0 11.0 11.0 11.0 11.0
min 0.0 1.0 2.0 1.0 1.0 1.0
mean 5.8 6.6 7.4 5.0 5.0 5.0
'''
#数据的快速统计汇总
print(df_concat.describe())
'''
aa ab ac aa ab ad
count 5.000000 5.000000 5.000000 5.000000 5.000000 5.000000
mean 5.800000 6.600000 7.400000 5.000000 5.000000 5.000000
std 4.438468 4.159327 3.911521 5.477226 5.477226 5.477226
min 0.000000 1.000000 2.000000 1.000000 1.000000 1.000000
25% 3.000000 4.000000 5.000000 1.000000 1.000000 1.000000
50% 6.000000 7.000000 8.000000 1.000000 1.000000 1.000000
75% 9.000000 10.000000 11.000000 11.000000 11.000000 11.000000
max 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000
'''
print("------plot画图---------")
df_concat_mean.plot(kind="line")
plt.show()
df_concat['aa'].plot(kind="bar")
plt.show()
参考《pandas官方文档中文版.pdf》