转载请注明出处:https://blog.csdn.net/weixin_40027906/article/details/90322541
Dataframe选择行与列,切片,布尔索引
1、选择行
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100)
print(df)
# 选择一行输出为Series
print('选择一行')
cl = df.loc[1]
print(cl)
print(type(cl))
# 选择多行输出为Dataframe
print('选择多行')
cl1 = df.loc[[0,1]]
print(cl1)
print(type(cl1))
-----------------------------结果---------------------------
0 1 2 3
0 26.563019 78.730693 71.734976 69.140773
1 80.097571 11.763913 72.013929 95.420896
2 5.816948 80.052821 44.601889 18.137680
选择一行
0 80.097571
1 11.763913
2 72.013929
3 95.420896
Name: 1, dtype: float64
<class 'pandas.core.series.Series'>
选择多行
0 1 2 3
0 26.563019 78.730693 71.734976 69.140773
1 80.097571 11.763913 72.013929 95.420896
<class 'pandas.core.frame.DataFrame'>
2、选择列
# 选择一列输出为Series
data1 = df[0]
print(data1)
print(type(data1))
# 选择多列输出为Dataframe
data2 = df[[0,1,2]]
print(data2)
print(type(data2))
-----------------------结果-----------------------
0 26.563019
1 80.097571
2 5.816948
Name: 0, dtype: float64
<class 'pandas.core.series.Series'>
0 1 2
0 26.563019 78.730693 71.734976
1 80.097571 11.763913 72.013929
2 5.816948 80.052821 44.601889
<class 'pandas.core.frame.DataFrame'>
3、切片
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.rand(12).reshape(3,4)*100)
df2 = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,index=list('abc'),columns=list('wxyz'))
print(df1)
print(df2)
# 按标签切片,末端包含
data1 = df1.loc[0:2]
data2 = df2.loc['a':'b']
print(data1)
print(data2)
print('-------------')
# 按照整数位切片,index的值为string时,iloc会报错
data3 = df1.iloc[0:4]
print(data3)
data4 = df2.loc['a':'b']
print(data4)
-----------------------------结果-----------------------------
0 1 2 3
0 1.991945 72.873862 99.795669 35.488814
1 84.619386 12.016335 46.075905 90.073915
2 8.224029 79.544446 91.706230 93.593594
w x y z
a 47.279544 51.302030 1.189867 12.461972
b 1.805559 67.717741 60.762379 37.243638
c 93.600639 76.565372 14.544118 22.152201
0 1 2 3
0 1.991945 72.873862 99.795669 35.488814
1 84.619386 12.016335 46.075905 90.073915
2 8.224029 79.544446 91.706230 93.593594
w x y z
a 47.279544 51.302030 1.189867 12.461972
b 1.805559 67.717741 60.762379 37.243638
-------------
0 1 2 3
0 1.991945 72.873862 99.795669 35.488814
1 84.619386 12.016335 46.075905 90.073915
2 8.224029 79.544446 91.706230 93.593594
w x y z
a 47.279544 51.302030 1.189867 12.461972
b 1.805559 67.717741 60.762379 37.243638
.loc[]与.iloc[]的区别:当index为string时,.iloc会报错
4、# Dataframe布尔索引
# Dataframe布尔索引
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100)
b1 = df < 50
# 返回一个布尔型的Dataframe
print(b1)
# 返回为true的值
print(df[b1])
print('---------------')
# 指定行的布尔索引
b2 = df.loc[[0,1]] < 50
print(b2)
print(df[b2])
# 相当于
print(df[df.loc[[0,1]]<50])
print('=============')
# 指定列的布尔索引
b3 = df[[0,1,2]] > 50
print(b3)
print(df[b3])
# 相当于
print(df[df[[0,1,2]]>50])
------------------------------结果------------------------------
0 1 2 3
0 False True False True
1 True False True True
2 True False False False
0 1 2 3
0 NaN 11.599345 NaN 7.074271
1 41.753027 NaN 8.381155 21.169034
2 45.363676 NaN NaN NaN
---------------
0 1 2 3
0 False True False True
1 True False True True
0 1 2 3
0 NaN 11.599345 NaN 7.074271
1 41.753027 NaN 8.381155 21.169034
2 NaN NaN NaN NaN
0 1 2 3
0 NaN 11.599345 NaN 7.074271
1 41.753027 NaN 8.381155 21.169034
2 NaN NaN NaN NaN
=============
0 1 2
0 True False True
1 False True False
2 False True True
0 1 2 3
0 58.355777 NaN 83.006753 NaN
1 NaN 50.136687 NaN NaN
2 NaN 94.653528 59.639213 NaN
0 1 2 3
0 58.355777 NaN 83.006753 NaN
1 NaN 50.136687 NaN NaN
2 NaN 94.653528 59.639213 NaN
5、多重索引
# 多重索引
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100,
index=list('abc'),columns=list('wxyz'))
print(df)
print(df[['w','x']].loc[['a','b']])
print(df[df<50][['x','y']])
---------------------------结果---------------------------
w x y z
a 33.028597 36.888760 37.080400 72.748026
b 62.586162 50.126715 88.284479 91.140025
c 41.807285 94.652663 16.173990 17.088718
w x
a 33.028597 36.888760
b 62.586162 50.126715
x y
a 36.88876 37.08040
b NaN NaN
c NaN 16.17399