import pandas as pd
data={'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],
'year':[2000,2001,2002,2001,2002],
'pop':[1.5,1.7,3.6,2.4,2.9]}
df=pd.DataFrame(data)
df.index=list('abcde')
print(df)
state year pop
a Ohio 2000 1.5
b Ohio 2001 1.7
c Ohio 2002 3.6
d Nevada 2001 2.4
e Nevada 2002 2.9
loc方法
1.行切片
print(df.loc['b'])
print(df.loc['b':'d'])
print(type(df.loc['b']))
print(type(df.loc['b':'d']))
state Ohio
year 2001
pop 1.7
Name: b, dtype: object
state year pop
b Ohio 2001 1.7
c Ohio 2002 3.6
d Nevada 2001 2.4
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
注意数据类型的不同
2.单行或多行检索
print(df.loc[['b']])
print(df.loc[['b','d']])
print(type(df.loc[['b']]))
print(type(df.loc[['b','d']]))
state year pop
b Ohio 2001 1.7
state year pop
b Ohio 2001 1.7
d Nevada 2001 2.4
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
3.列检索
print(df.loc[:,['year']])
print(df.loc[:,['state','year']])
print(type(df.loc[:,['year']]))
print(type(df.loc[:,['state','year']]))
year
a 2000
b 2001
c 2002
d 2001
e 2002
state year
a Ohio 2000
b Ohio 2001
c Ohio 2002
d Nevada 2001
e Nevada 2002
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame
4.结合结合检索
print(df.loc[['b'],['state']])
print(df.loc[['b','d'],['state','year']])
print(df.loc[:,['state','year']])
print(type(df.loc[['b'],['state']]))
print(type(df.loc[['b','d'],['state','year']]))
print(type(df.loc[:,['state','year']]))
state
b Ohio
state year
b Ohio 2001
d Nevada 2001
state year
a Ohio 2000
b Ohio 2001
c Ohio 2002
d Nevada 2001
e Nevada 2002
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
(第二项缺省参数)
5.列切片
同第一项
print(df.loc[:,'year'])
print(df.loc[:,'year':'pop'])
print(type(df.loc[:,'year']))
print(type(df.loc[:,'year':'pop']))
a 2000
b 2001
c 2002
d 2001
e 2002
Name: year, dtype: int64
year pop
a 2000 1.5
b 2001 1.7
c 2002 3.6
d 2001 2.4
e 2002 2.9
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
6.行列结合切片
print(df.loc['b','year'])
print(df.loc['b','state':'pop'])
print(df.loc['b':'d','year'])
print(df.loc['b':'d','year':'pop'])
print(type(df.loc['b':'year']))
print(type(df.loc['b','state':'pop']))
print(type(df.loc['b':'d','year']))
print(type(df.loc['b':'d','year':'pop']))
2001
state Ohio
year 2001
pop 1.7
Name: b, dtype: object
b 2001
c 2002
d 2001
Name: year, dtype: int64
year pop
b 2001 1.7
c 2002 3.6
d 2001 2.4
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
注意类型的不同
7.组合使用
print(df.loc['b',['state']])
print(df.loc['b',['state','year']])
print(df.loc['b':'d',['state']])
print(df.loc['b':'d',['state','year']])
print(type(df.loc['b',['state']]))
print(type(df.loc['b',['state','year']]))
print(type(df.loc['b':'d',['state']]))
print(type(df.loc['b':'d',['state','year']]))
state Ohio
Name: b, dtype: object
state Ohio
year 2001
Name: b, dtype: object
state
b Ohio
c Ohio
d Nevada
state year
b Ohio 2001
c Ohio 2002
d Nevada 2001
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
8.返回判断内容
print(df.loc[df['pop']>2,'pop'])
print(df.loc[df['pop']>2,'year':'pop'])
print(df.loc[df['pop']>2,['pop']])
print(df.loc[df['pop']>2,['year','pop']])
c 3.6
d 2.4
e 2.9
Name: pop, dtype: float64
year pop
c 2002 3.6
d 2001 2.4
e 2002 2.9
pop
c 3.6
d 2.4
e 2.9
year pop
c 2002 3.6
d 2001 2.4
e 2002 2.9
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
print(df.loc[lambda x: x['pop']>3])
print(df.loc[lambda x: x['year']>2002])
print(df.loc[lambda x: x['year']>2001])
print(type(df.loc[lambda x: x['pop']>3]))
print(type(df.loc[lambda x: x['year']>2002]))
print(type(df.loc[lambda x: x['year']>2001]))
state year pop
c Ohio 2002 3.6
Empty DataFrame
Columns: [state, year, pop]
Index: []
state year pop
c Ohio 2002 3.6
e Nevada 2002 2.9
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
iloc方法
1.行切片
print(df.iloc[1])
print(df.iloc[1:2])
print(df.iloc[1:3])
print(type(df.iloc[1]))
print(type(df.iloc[1:2]))
print(type(df.iloc[1:3]))
state Ohio
year 2001
pop 1.7
Name: b, dtype: object
state year pop
b Ohio 2001 1.7
state year pop
b Ohio 2001 1.7
c Ohio 2002 3.6
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
2.列切片
print(df.iloc[:,1])
print(df.iloc[:,1:2])
print(df.iloc[:,1:3])
print(type(df.iloc[:,1]))
print(type(df.iloc[:,1:2]))
print(type(df.iloc[:,1:3]))
a 2000
b 2001
c 2002
d 2001
e 2002
Name: year, dtype: int64
year
a 2000
b 2001
c 2002
d 2001
e 2002
year pop
a 2000 1.5
b 2001 1.7
c 2002 3.6
d 2001 2.4
e 2002 2.9
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
3.行列结合切片
print(df.iloc[1,1])
print(df.iloc[1,1:3])
print(df.iloc[1,1:2])
print(df.iloc[1:3,1])
print(df.iloc[1:3,1:3])
print(type(df.iloc[1,1]))
print(type(df.iloc[1,1:3]))
print(type(df.iloc[1,1:2]))
print(type(df.iloc[1:3,1]))
print(type(df.iloc[1:3,1:3]))
2001
year 2001
pop 1.7
Name: b, dtype: object
year 2001
Name: b, dtype: object
b 2001
c 2002
Name: year, dtype: int64
year pop
b 2001 1.7
c 2002 3.6
<class 'numpy.int64'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
4.行索引
print(df.iloc[[1]])
print(df.iloc[[1,3]])
print(type(df.iloc[[1]]))
print(type(df.iloc[[1,3]]))
state year pop
b Ohio 2001 1.7
state year pop
b Ohio 2001 1.7
d Nevada 2001 2.4
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
5.列索引
print(df.iloc[:,[0]])
print(df.iloc[:,[0,2]])
print(type(df.iloc[:,[0]]))
print(type(df.iloc[:,[0,2]]))
state
a Ohio
b Ohio
c Ohio
d Nevada
e Nevada
state pop
a Ohio 1.5
b Ohio 1.7
c Ohio 3.6
d Nevada 2.4
e Nevada 2.9
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
6.行列结合索引
print(df.iloc[[1],[0]])
print(df.iloc[[1],[0,2]])
print(df.iloc[[1,3],[0]])
print(df.iloc[[1,3],[0,2]])
print(type(df.iloc[[1],[0]]))
print(type(df.iloc[[1],[0,2]]))
print(type(df.iloc[[1,3],[0]]))
print(type(df.iloc[[1,3],[0,2]]))
state
b Ohio
state pop
b Ohio 1.7
state
b Ohio
d Nevada
state pop
b Ohio 1.7
d Nevada 2.4
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
7.返回判断内容
df.index=list(range(22,27))
print(df.iloc[lambda x:x.index%2==0])
state year pop
22 Ohio 2000 1.5
24 Ohio 2002 3.6
26 Nevada 2002 2.9