import numpy as np
import pandas as pd
3.1 索引器
3.1.1 表的列索引
df= pd. read_csv( 'E:/DW学习/Python 学习代码/joyful-pandas-master/data/learn_pandas.csv' ,
usecols= [ 'School' , 'Grade' , 'Name' , 'Gender' , 'Weight' , 'Transfer' ] )
df[ 'Name' ] . head( )
0 Gaopeng Yang
1 Changqiang You
2 Mei Sun
3 Xiaojuan Sun
4 Gaojuan You
Name: Name, dtype: object
df[ [ 'Gender' , 'Name' ] ] . head( )
Gender Name 0 Female Gaopeng Yang 1 Male Changqiang You 2 Male Mei Sun 3 Female Xiaojuan Sun 4 Male Gaojuan You
df. Name. head( )
0 Gaopeng Yang
1 Changqiang You
2 Mei Sun
3 Xiaojuan Sun
4 Gaojuan You
Name: Name, dtype: object
3.1.2 序列的行索引
s= pd. Series( [ 1 , 2 , 3 , 4 , 5 , 6 ] , index= [ 'a' , 'b' , 'a' , 'a' , 'a' , 'c' ] )
s[ 'a' ]
a 1
a 3
a 4
a 5
dtype: int64
s[ 'b' ]
2
s[ [ 'c' , 'b' ] ]
c 6
b 2
dtype: int64
s[ 'c' : 'b' : - 2 ]
c 6
a 4
b 2
dtype: int64
s = pd. Series( [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' ] , index= [ 1 , 3 , 1 , 2 , 5 , 4 ] )
s[ 1 ]
1 a
1 c
dtype: object
s[ [ 2 , 3 ] ]
2 d
3 b
dtype: object
s[ 1 : - 1 : 2 ]
3 b
2 d
dtype: object
3.1.3 loc 索引器
'''前面讲到了对 DataFrame 的列进行选取,下面要讨论其行的选取。对于表而言,有两种索引器,一种是基于
元素的 loc 索引器,另一种是基于 位置的 iloc 索引器。
loc 索引器的一般形式是 loc[*, *] ,其中第一个 * 代表行的选择,第二个 * 代表列的选择,如果省略第二个
位置写作 loc[*] ,这个 * 是指行的筛选。其中,* 的位置一共有五类合法对象,分别是:单个元素、元素列
表、元素切片、布尔列表以及函数,下面将依次说明。
为了演示相应操作,先利用 set_index 方法把 Name 列设为索引,关于该函数的其他用法将在多级索引一章
介绍。'''
df_demo= df. set_index( 'Name' )
df_demo. head( )
School Grade Gender Weight Transfer Name Gaopeng Yang Shanghai Jiao Tong University Freshman Female 46.0 N Changqiang You Peking University Freshman Male 70.0 N Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Xiaojuan Sun Fudan University Sophomore Female 41.0 N Gaojuan You Fudan University Sophomore Male 74.0 N
df_demo. loc[ 'Qiang Sun' ]
School Grade Gender Weight Transfer Name Qiang Sun Tsinghua University Junior Female 53.0 N Qiang Sun Tsinghua University Sophomore Female 40.0 N Qiang Sun Shanghai Jiao Tong University Junior Female NaN N
df_demo. loc[ 'Quan Zhao' ]
School Shanghai Jiao Tong University
Grade Junior
Gender Female
Weight 53
Transfer N
Name: Quan Zhao, dtype: object
df_demo. loc[ 'Qiang Sun' , 'School' ]
Name
Qiang Sun Tsinghua University
Qiang Sun Tsinghua University
Qiang Sun Shanghai Jiao Tong University
Name: School, dtype: object
df_demo. loc[ 'Quan Zhao' , 'School' ]
'Shanghai Jiao Tong University'
df_demo. loc[ [ 'Qiang Sun' , 'Quan Zhao' ] , [ 'School' , 'Gender' ] ]
School Gender Name Qiang Sun Tsinghua University Female Qiang Sun Tsinghua University Female Qiang Sun Shanghai Jiao Tong University Female Quan Zhao Shanghai Jiao Tong University Female
df_demo. loc[ 'Gaojuan You' : 'Gaoqiang Qian' , 'School' : 'Gender' ]
School Grade Gender Name Gaojuan You Fudan University Sophomore Male Xiaoli Qian Tsinghua University Freshman Female Qiang Chu Shanghai Jiao Tong University Freshman Female Gaoqiang Qian Tsinghua University Junior Female
df_loc_slice_demo = df_demo. copy( )
df_loc_slice_demo. index = range ( df_demo. shape[ 0 ] , 0 , - 1 )
df_loc_slice_demo. index
RangeIndex(start=200, stop=0, step=-1)
df_loc_slice_demo. loc[ 5 : 3 ]
School Grade Gender Weight Transfer 5 Fudan University Junior Female 46.0 N 4 Tsinghua University Senior Female 50.0 N 3 Shanghai Jiao Tong University Senior Female 45.0 N
df_loc_slice_demo
School Grade Gender Weight Transfer 200 Shanghai Jiao Tong University Freshman Female 46.0 N 199 Peking University Freshman Male 70.0 N 198 Shanghai Jiao Tong University Senior Male 89.0 N 197 Fudan University Sophomore Female 41.0 N 196 Fudan University Sophomore Male 74.0 N ... ... ... ... ... ... 5 Fudan University Junior Female 46.0 N 4 Tsinghua University Senior Female 50.0 N 3 Shanghai Jiao Tong University Senior Female 45.0 N 2 Shanghai Jiao Tong University Senior Male 71.0 N 1 Tsinghua University Sophomore Male 51.0 N
200 rows × 5 columns
df_loc_slice_demo. loc[ 3 : 5 ]
School Grade Gender Weight Transfer
df_loc_slice_demo. loc[ 3 : 5 : - 1 ]
School Grade Gender Weight Transfer 3 Shanghai Jiao Tong University Senior Female 45.0 N 4 Tsinghua University Senior Female 50.0 N 5 Fudan University Junior Female 46.0 N
df_demo. loc[ df_demo. Weight> 70 ] . head( )
School Grade Gender Weight Transfer Name Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Gaojuan You Fudan University Sophomore Male 74.0 N Xiaopeng Zhou Shanghai Jiao Tong University Freshman Male 74.0 N Xiaofeng Sun Tsinghua University Senior Male 71.0 N Qiang Zheng Shanghai Jiao Tong University Senior Male 87.0 N
df_demo[ df_demo. Weight> 70 ] . head( )
School Grade Gender Weight Transfer Name Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Gaojuan You Fudan University Sophomore Male 74.0 N Xiaopeng Zhou Shanghai Jiao Tong University Freshman Male 74.0 N Xiaofeng Sun Tsinghua University Senior Male 71.0 N Qiang Zheng Shanghai Jiao Tong University Senior Male 87.0 N
df_demo. Weight> 70
Name
Gaopeng Yang False
Changqiang You False
Mei Sun True
Xiaojuan Sun False
Gaojuan You True
...
Xiaojuan Sun False
Li Zhao False
Chengqiang Chu False
Chengmei Shen True
Chunpeng Lv False
Name: Weight, Length: 200, dtype: bool
df_demo. loc[ df_demo. Grade. isin( [ 'Freshman' , 'Senior' ] ) ] . head( )
School Grade Gender Weight Transfer Name Gaopeng Yang Shanghai Jiao Tong University Freshman Female 46.0 N Changqiang You Peking University Freshman Male 70.0 N Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Xiaoli Qian Tsinghua University Freshman Female 51.0 N Qiang Chu Shanghai Jiao Tong University Freshman Female 52.0 N
df_demo. Grade. isin( [ 'Freshman' , 'Senior' ] )
Name
Gaopeng Yang True
Changqiang You True
Mei Sun True
Xiaojuan Sun False
Gaojuan You False
...
Xiaojuan Sun False
Li Zhao True
Chengqiang Chu True
Chengmei Shen True
Chunpeng Lv False
Name: Grade, Length: 200, dtype: bool
condition_1_1 = df_demo. School == 'Fudan University'
condition_1_1
Name
Gaopeng Yang False
Changqiang You False
Mei Sun False
Xiaojuan Sun True
Gaojuan You True
...
Xiaojuan Sun True
Li Zhao False
Chengqiang Chu False
Chengmei Shen False
Chunpeng Lv False
Name: School, Length: 200, dtype: bool
condition_1_2 = df_demo. Grade == 'Senior'
condition_1_3 = df_demo. Weight > 70
condition_1 = condition_1_1 & condition_1_2 & condition_1_3
condition_1
Name
Gaopeng Yang False
Changqiang You False
Mei Sun False
Xiaojuan Sun False
Gaojuan You False
...
Xiaojuan Sun False
Li Zhao False
Chengqiang Chu False
Chengmei Shen False
Chunpeng Lv False
Length: 200, dtype: bool
condition_2_1 = df_demo. School == 'Peking University'
condition_2_2 = df_demo. Grade == 'Senior'
condition_2_3 = df_demo. Weight > 80
condition_2 = condition_2_1 & ( ~ condition_2_2) & condition_2_3
condition_2
Name
Gaopeng Yang False
Changqiang You False
Mei Sun False
Xiaojuan Sun False
Gaojuan You False
...
Xiaojuan Sun False
Li Zhao False
Chengqiang Chu False
Chengmei Shen False
Chunpeng Lv False
Length: 200, dtype: bool
df_demo. loc[ condition_1 | condition_2]
School Grade Gender Weight Transfer Name Qiang Han Peking University Freshman Male 87.0 N Chengpeng Zhou Fudan University Senior Male 81.0 N Changpeng Zhao Peking University Freshman Male 83.0 N Chengpeng Qian Fudan University Senior Male 73.0 Y
def condition ( x) :
condition_1_1 = x. School == 'Fudan University'
condition_1_2 = x. Grade == 'Senior'
condition_1_3 = x. Weight > 70
condition_1 = condition_1_1 & condition_1_2 & condition_1_3
condition_2_1 = x. School == 'Peking University'
condition_2_2 = x. Grade == 'Senior'
condition_2_3 = x. Weight > 80
condition_2 = condition_2_1 & ( ~ condition_2_2) & condition_2_3
result = condition_1 | condition_2
return result
df_demo. loc[ condition]
School Grade Gender Weight Transfer Name Qiang Han Peking University Freshman Male 87.0 N Chengpeng Zhou Fudan University Senior Male 81.0 N Changpeng Zhao Peking University Freshman Male 83.0 N Chengpeng Qian Fudan University Senior Male 73.0 Y
df_demo. loc[ lambda x: 'Quan Zhao' , lambda x: 'Gender' ]
'Female'
df_demo. loc[ lambda x: slice ( 'Gaojuan You' , 'Gaoqiang Qian' ) ]
School Grade Gender Weight Transfer Name Gaojuan You Fudan University Sophomore Male 74.0 N Xiaoli Qian Tsinghua University Freshman Female 51.0 N Qiang Chu Shanghai Jiao Tong University Freshman Female 52.0 N Gaoqiang Qian Tsinghua University Junior Female 50.0 N
'''不要使用链式赋值
在对表或者序列赋值时,应当在使用一层索引器后直接进行赋值操作,这样做是由于进行多次
索引后赋值是赋在临时返回的 copy 副本上的,而没有真正修改元素从而报出 SettingWithCopyWarning 警告。例如,下面给出的例子:'''
df_chain = pd. DataFrame( [ [ 0 , 0 ] , [ 1 , 0 ] , [ - 1 , 0 ] ] , columns= list ( 'AB' ) )
df_chain
import warnings
df_chain. loc[ df_chain. A!= 0 , 'B' ] = 1
df_chain
3.1.4 iloc 索引器
'''iloc 的使用与 loc 完全类似,只不过是针对位置进行筛选,在相应的 * 位置处一共也有五类合法对象,分别
是:整数、整数列表、整数切片、布尔列表以及函数,函数的返回值必须是前面的四类合法对象中的一个,其
输入同样也为 DataFrame 本身。'''
df_demo. iloc[ 1 , 1 ]
'Freshman'
df_demo
School Grade Gender Weight Transfer Name Gaopeng Yang Shanghai Jiao Tong University Freshman Female 46.0 N Changqiang You Peking University Freshman Male 70.0 N Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Xiaojuan Sun Fudan University Sophomore Female 41.0 N Gaojuan You Fudan University Sophomore Male 74.0 N ... ... ... ... ... ... Xiaojuan Sun Fudan University Junior Female 46.0 N Li Zhao Tsinghua University Senior Female 50.0 N Chengqiang Chu Shanghai Jiao Tong University Senior Female 45.0 N Chengmei Shen Shanghai Jiao Tong University Senior Male 71.0 N Chunpeng Lv Tsinghua University Sophomore Male 51.0 N
200 rows × 5 columns
df_demo. iloc[ [ 0 , 1 ] , [ 0 , 1 ] ]
School Grade Name Gaopeng Yang Shanghai Jiao Tong University Freshman Changqiang You Peking University Freshman
df_demo. iloc[ 1 : 4 , 2 : 4 ]
Gender Weight Name Changqiang You Male 70.0 Mei Sun Male 89.0 Xiaojuan Sun Female 41.0
df_demo. iloc[ lambda x: slice ( 1 , 4 ) ]
School Grade Gender Weight Transfer Name Changqiang You Peking University Freshman Male 70.0 N Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Xiaojuan Sun Fudan University Sophomore Female 41.0 N
df_demo. iloc[ ( df_demo. Weight> 80 ) . values] . head( )
School Grade Gender Weight Transfer Name Mei Sun Shanghai Jiao Tong University Senior Male 89.0 N Qiang Zheng Shanghai Jiao Tong University Senior Male 87.0 N Qiang Han Peking University Freshman Male 87.0 N Chengpeng Zhou Fudan University Senior Male 81.0 N Feng Han Shanghai Jiao Tong University Sophomore Male 82.0 N
df_demo. School. iloc[ 1 ]
'Peking University'
df_demo. School. iloc[ 1 : 5 : 2 ]
Name
Changqiang You Peking University
Xiaojuan Sun Fudan University
Name: School, dtype: object
3.1.5 query 方法
'''在 pandas 中,支持把字符串形式的查询表达式传入 query 方法来查询数据,其表达式的执行结果必须返回
布尔列表。在进行复杂索引时,由于这种检索方式无需像普通方法一样重复使用 DataFrame 的名字来引用
列名,一般而言会使代码长度在不降低可读性的前提下有所减少。
例如,将 loc 一节中的复合条件查询例子可以如下改写:
query n.查询 询问; 疑问; 问号;
'''
df. query( '((School == "Fudan University")&'
' (Grade == "Senior")&'
' (Weight > 70))|'
'((School == "Peking University")&'
' (Grade != "Senior")&'
' (Weight > 80))' )
School Grade Name Gender Weight Transfer 38 Peking University Freshman Qiang Han Male 87.0 N 66 Fudan University Senior Chengpeng Zhou Male 81.0 N 99 Peking University Freshman Changpeng Zhao Male 83.0 N 131 Fudan University Senior Chengpeng Qian Male 73.0 Y
df. query( 'Weight > Weight.mean()' ) . head( )
School Grade Name Gender Weight Transfer 1 Peking University Freshman Changqiang You Male 70.0 N 2 Shanghai Jiao Tong University Senior Mei Sun Male 89.0 N 4 Fudan University Sophomore Gaojuan You Male 74.0 N 10 Shanghai Jiao Tong University Freshman Xiaopeng Zhou Male 74.0 N 14 Tsinghua University Senior Xiaomei Zhou Female 57.0 N
df. query( '(Grade not in ["Freshman", "Sophomore"]) and'
'(Gender == "Male")' ) . head( )
School Grade Name Gender Weight Transfer 2 Shanghai Jiao Tong University Senior Mei Sun Male 89.0 N 16 Tsinghua University Junior Xiaoqiang Qin Male 68.0 N 17 Tsinghua University Junior Peng Wang Male 65.0 N 18 Tsinghua University Senior Xiaofeng Sun Male 71.0 N 21 Shanghai Jiao Tong University Senior Xiaopeng Shen Male 62.0 NaN
df. query( 'Grade == ["Junior", "Senior"]' ) . head( )
School Grade Name Gender Weight Transfer 2 Shanghai Jiao Tong University Senior Mei Sun Male 89.0 N 7 Tsinghua University Junior Gaoqiang Qian Female 50.0 N 9 Peking University Junior Juan Xu Female NaN N 11 Tsinghua University Junior Xiaoquan Lv Female 43.0 N 12 Shanghai Jiao Tong University Senior Peng You Female 48.0 NaN
3.1.6 随机抽样
'''如果把 DataFrame 的每一行看作一个样本,或把每一列看作一个特征,再把整个 DataFrame 看作总体,想
要对样本或特征进行随机抽样就可以用 sample 函数。有时在拿到大型数据集后,想要对统计特征进行计算
来了解数据的大致分布,但是这很费时间。同时,由于许多统计特征在等概率不放回的简单随机抽样条件下,
是总体统计特征的无偏估计,比如样本均值和总体均值,那么就可以先从整张表中抽出一部分来做近似估计。
sample 函数中的主要参数为 n, axis, frac, replace, weights ,前三个分别是指抽样数量、抽样的方向(0 为
行、1 为列)和抽样比例(0.3 则为从总体中抽出 30% 的样本)。
replace 和 weights 分别是指是否放回和每个样本的抽样相对概率,当 replace = True 则表示有放回抽样。例
如,对下面构造的 df_sample 以 value 值的相对大小为抽样概率进行有放回抽样,抽样数量为 3。'''
df_sample= pd. DataFrame( { 'id' : list ( 'abcde' ) , 'value' : [ 1 , 2 , 3 , 4 , 90 ] } )
df_sample
df_sample. sample( 3 , replace = True , weights = df_sample. value)
3.2 多级索引
3.2.1 多级索引及其表的结构
np. random. seed( 0 )
multi_index = pd. MultiIndex. from_product( [ list ( 'ABCD' ) ,
df. Gender. unique( ) ] , names= ( 'School' , 'Gender' ) )
multi_column = pd. MultiIndex. from_product( [ [ 'Height' , 'Weight' ] ,
df. Grade. unique( ) ] , names= ( 'Indicator' , 'Grade' ) )
df_multi = pd. DataFrame( np. c_[ ( np. random. randn( 8 , 4 ) * 5 + 163 ) . tolist( ) ,
( np. random. randn( 8 , 4 ) * 5 + 65 ) . tolist( ) ] ,
index = multi_index,
columns = multi_column) . round ( 1 )
df_multi
Indicator Height Weight Grade Freshman Senior Sophomore Junior Freshman Senior Sophomore Junior School Gender A Female 171.8 165.0 167.9 174.2 60.6 55.1 63.3 65.8 Male 172.3 158.1 167.8 162.2 71.2 71.0 63.1 63.5 B Female 162.5 165.1 163.7 170.3 59.8 57.9 56.5 74.8 Male 166.8 163.6 165.2 164.7 62.5 62.8 58.7 68.9 C Female 170.5 162.0 164.6 158.7 56.9 63.9 60.5 66.9 Male 150.2 166.3 167.3 159.3 62.4 59.1 64.9 67.1 D Female 174.3 155.7 163.2 162.1 65.3 66.5 61.8 63.2 Male 170.7 170.3 163.8 164.9 61.6 63.2 60.9 56.4
df_multi. index. names
FrozenList(['School', 'Gender'])
df_multi. columns. names
FrozenList(['Indicator', 'Grade'])
df_multi. index. values
array([('A', 'Female'), ('A', 'Male'), ('B', 'Female'), ('B', 'Male'),
('C', 'Female'), ('C', 'Male'), ('D', 'Female'), ('D', 'Male')],
dtype=object)
df_multi. columns. values
array([('Height', 'Freshman'), ('Height', 'Senior'),
('Height', 'Sophomore'), ('Height', 'Junior'),
('Weight', 'Freshman'), ('Weight', 'Senior'),
('Weight', 'Sophomore'), ('Weight', 'Junior')], dtype=object)
df_multi. index. get_level_values( 0 )
Index(['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], dtype='object', name='School')
3.2.2 多级索引中的 loc 索引器
df_multi = df. set_index( [ 'School' , 'Grade' ] )
df_multi
Name Gender Weight Transfer School Grade Shanghai Jiao Tong University Freshman Gaopeng Yang Female 46.0 N Peking University Freshman Changqiang You Male 70.0 N Shanghai Jiao Tong University Senior Mei Sun Male 89.0 N Fudan University Sophomore Xiaojuan Sun Female 41.0 N Sophomore Gaojuan You Male 74.0 N ... ... ... ... ... Junior Xiaojuan Sun Female 46.0 N Tsinghua University Senior Li Zhao Female 50.0 N Shanghai Jiao Tong University Senior Chengqiang Chu Female 45.0 N Senior Chengmei Shen Male 71.0 N Tsinghua University Sophomore Chunpeng Lv Male 51.0 N
200 rows × 4 columns
df_multi = df_multi. sort_index( )
df_multi. loc[ ( 'Fudan University' , 'Junior' ) ] . head( )
Name Gender Weight Transfer School Grade Fudan University Junior Yanli You Female 48.0 N Junior Chunqiang Chu Male 72.0 N Junior Changfeng Lv Male 76.0 N Junior Yanjuan Lv Female 49.0 NaN Junior Gaoqiang Zhou Female 43.0 N
df_multi. loc[ [ ( 'Fudan University' , 'Senior' ) ,
( 'Shanghai Jiao Tong University' , 'Freshman' ) ] ] . head( )
Name Gender Weight Transfer School Grade Fudan University Senior Chengpeng Zheng Female 38.0 N Senior Feng Zhou Female 47.0 N Senior Gaomei Lv Female 34.0 N Senior Chunli Lv Female 56.0 N Senior Chengpeng Zhou Male 81.0 N
df_multi. loc[ df_multi. Weight > 70 ] . head( )
Name Gender Weight Transfer School Grade Fudan University Freshman Feng Wang Male 74.0 N Junior Chunqiang Chu Male 72.0 N Junior Changfeng Lv Male 76.0 N Senior Chengpeng Zhou Male 81.0 N Senior Chengpeng Qian Male 73.0 Y
df_multi. loc[ lambda x: ( 'Fudan University' , 'Junior' ) ] . head( )
Name Gender Weight Transfer School Grade Fudan University Junior Yanli You Female 48.0 N Junior Chunqiang Chu Male 72.0 N Junior Changfeng Lv Male 76.0 N Junior Yanjuan Lv Female 49.0 NaN Junior Gaoqiang Zhou Female 43.0 N
res = df_multi. loc[ ( [ 'Peking University' , 'Fudan University' ] ,
[ 'Sophomore' , 'Junior' ] ) , : ]
res. head( )
Name Gender Weight Transfer School Grade Peking University Sophomore Changmei Xu Female 43.0 N Sophomore Xiaopeng Qin Male NaN N Sophomore Mei Xu Female 39.0 N Sophomore Xiaoli Zhou Female 55.0 N Sophomore Peng Han Female 34.0 NaN
3.2.3 IndexSlice 对象
'''前面介绍的方法,即使在索引不重复的时候,也只能对元组整体进行切片,而不能对每层进行切片,也不允
许将切片和布尔列表混合使用,引入 IndexSlice 对象就能解决这个问题。Slice 对象一共有两种形式,第一种
为 loc[idx[*,*]] 型,第二种为 loc[idx[*,*],idx[*,*]] 型,下面将进行介绍。为了方便演示,下面构造一个 索引
不重复的 DataFrame :'''
np. random. seed( 0 )
L1, L2 = [ 'A' , 'B' , 'C' ] , [ 'a' , 'b' , 'c' ]
mul_index1 = pd. MultiIndex. from_product( [ L1, L2] , names= ( 'Upper' , 'Lower' ) )
L3, L4 = [ 'D' , 'E' , 'F' ] , [ 'd' , 'e' , 'f' ]
mul_index2 = pd. MultiIndex. from_product( [ L3, L4] , names= ( 'Big' , 'Small' ) )
df_ex = pd. DataFrame( np. random. randint( - 9 , 10 , ( 9 , 9 ) ) ,
index= mul_index1,
columns= mul_index2)
df_ex
Big D E F Small d e f d e f d e f Upper Lower A a 3 6 -9 -6 -6 -2 0 9 -5 b -3 3 -8 -3 -2 5 8 -4 4 c -1 0 7 -4 6 6 -9 9 -6 B a 8 5 -2 -9 -8 0 -9 1 -6 b 2 9 -7 -9 -9 -5 -4 -3 -1 c 8 6 -5 0 1 -8 -8 -2 0 C a -6 -3 2 5 9 -9 5 -6 3 b 1 2 -5 -3 -5 6 -6 3 -5 c -1 5 6 -6 6 4 7 8 -4
idx = pd. IndexSlice
df_ex. loc[ idx[ 'C' : , ( 'D' , 'f' ) : ] ]
Big D E F Small f d e f d e f Upper Lower C a 2 5 9 -9 5 -6 3 b -5 -3 -5 6 -6 3 -5 c 6 -6 6 4 7 8 -4
df_ex. loc[ idx[ : 'A' , lambda x: x. sum ( ) > 0 ] ]
Big D F Small d e e Upper Lower A a 3 6 9 b -3 3 -4 c -1 0 9
df_ex. loc[ idx[ : 'A' , 'b' : ] , idx[ 'E' : , 'e' : ] ]
Big E F Small e f e f Upper Lower A b -2 5 -4 4 c 6 6 9 -6
3.2.4 多级索引的构造
'''前面提到了多级索引表的结构和切片,那么除了使用 set_index 之外,如何自己构造多级索引呢?常用的有
from_tuples, from_arrays, from_product 三种方法,它们都是 pd.MultiIndex 对象下的函数。
from_tuples 指根据传入由元组组成的列表进行构造:'''
my_tuple = [ ( 'a' , 'cat' ) , ( 'a' , 'dog' ) , ( 'b' , 'cat' ) , ( 'b' , 'dog' ) ]
pd. MultiIndex. from_tuples( my_tuple, names= [ 'First' , 'Second' ] )
MultiIndex([('a', 'cat'),
('a', 'dog'),
('b', 'cat'),
('b', 'dog')],
names=['First', 'Second'])
my_array = [ list ( 'aabb' ) , [ 'cat' , 'dog' ] * 2 ]
pd. MultiIndex. from_arrays( my_array, names= [ 'First' , 'Second' ] )
MultiIndex([('a', 'cat'),
('a', 'dog'),
('b', 'cat'),
('b', 'dog')],
names=['First', 'Second'])
my_list1 = [ 'a' , 'b' ]
my_list2 = [ 'cat' , 'dog' ]
pd. MultiIndex. from_product( [ my_list1,
my_list2] ,
names= [ 'First' , 'Second' ] )
MultiIndex([('a', 'cat'),
('a', 'dog'),
('b', 'cat'),
('b', 'dog')],
names=['First', 'Second'])
3.3 索引的常用方法
3.3.1 索引层的交换和删除
np. random. seed( 0 )
L1, L2, L3 = [ 'A' , 'B' ] , [ 'a' , 'b' ] , [ 'alpha' , 'beta' ]
mul_index1 = pd. MultiIndex. from_product( [ L1, L2, L3] ,
names= ( 'Upper' , 'Lower' , 'Extra' ) )
L4, L5, L6 = [ 'C' , 'D' ] , [ 'c' , 'd' ] , [ 'cat' , 'dog' ]
mul_index2 = pd. MultiIndex. from_product( [ L4, L5, L6] ,
names= ( 'Big' , 'Small' , 'Other' ) )
df_ex = pd. DataFrame( np. random. randint( - 9 , 10 , ( 8 , 8 ) ) ,
index= mul_index1,
columns= mul_index2)
df_ex
Big C D Small c d c d Other cat dog cat dog cat dog cat dog Upper Lower Extra A a alpha 2 -5 -3 -5 6 -6 3 -5 beta -1 5 6 -6 6 4 7 8 b alpha -4 0 -6 -9 -4 -9 8 9 beta -5 -7 7 -6 -7 1 4 7 B a alpha -2 0 -9 1 9 2 -7 -7 beta -6 -6 9 5 -6 8 9 5 b alpha 0 -8 -5 1 2 -1 2 -7 beta 7 -9 -9 -3 5 1 -1 4
df_ex. swaplevel( 0 , 2 , axis= 1 ) . head( )
Other cat dog cat dog cat dog cat dog Small c c d d c c d d Big C C C C D D D D Upper Lower Extra A a alpha 2 -5 -3 -5 6 -6 3 -5 beta -1 5 6 -6 6 4 7 8 b alpha -4 0 -6 -9 -4 -9 8 9 beta -5 -7 7 -6 -7 1 4 7 B a alpha -2 0 -9 1 9 2 -7 -7
df_ex. reorder_levels( [ 2 , 0 , 1 ] , axis= 0 ) . head( )
Big C D Small c d c d Other cat dog cat dog cat dog cat dog Extra Upper Lower alpha A a 2 -5 -3 -5 6 -6 3 -5 beta A a -1 5 6 -6 6 4 7 8 alpha A b -4 0 -6 -9 -4 -9 8 9 beta A b -5 -7 7 -6 -7 1 4 7 alpha B a -2 0 -9 1 9 2 -7 -7
df_ex. droplevel( 1 , axis= 1 )
Big C D Other cat dog cat dog cat dog cat dog Upper Lower Extra A a alpha 2 -5 -3 -5 6 -6 3 -5 beta -1 5 6 -6 6 4 7 8 b alpha -4 0 -6 -9 -4 -9 8 9 beta -5 -7 7 -6 -7 1 4 7 B a alpha -2 0 -9 1 9 2 -7 -7 beta -6 -6 9 5 -6 8 9 5 b alpha 0 -8 -5 1 2 -1 2 -7 beta 7 -9 -9 -3 5 1 -1 4
3.3.2 索引属性的修改
df_ex. rename_axis( index= { 'Upper' : 'Changed_row' } ,
columns= { 'Other' : 'Changed_Col' } ) . head( )
Big C D Small c d c d Changed_Col cat dog cat dog cat dog cat dog Changed_row Lower Extra A a alpha 2 -5 -3 -5 6 -6 3 -5 beta -1 5 6 -6 6 4 7 8 b alpha -4 0 -6 -9 -4 -9 8 9 beta -5 -7 7 -6 -7 1 4 7 B a alpha -2 0 -9 1 9 2 -7 -7
df_ex. rename( columns= { 'cat' : 'not_cat' } , level= 2 ) . head( )
Big C D Small c d c d Other not_cat dog not_cat dog not_cat dog not_cat dog Upper Lower Extra A a alpha 2 -5 -3 -5 6 -6 3 -5 beta -1 5 6 -6 6 4 7 8 b alpha -4 0 -6 -9 -4 -9 8 9 beta -5 -7 7 -6 -7 1 4 7 B a alpha -2 0 -9 1 9 2 -7 -7