一、不做索引则会对数据每个值进行判断
import numpy as np
import pandas as pd
# 布尔型索引
# 和Series原理相同
df = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=['one', 'two', 'three', 'four'],
columns=['a', 'b', 'c', 'd'])
print("df = \n", df)
print('-' * 100)
# 不做索引则会对数据每个值进行判断
b1 = df < 20
print("b1 = \n{0} \ntype(b1) = {1}".format(b1, type(b1)))
print('-' * 50)
# 索引结果保留 所有数据:True返回原数据,False返回值为NaN
print("df[b1] = \n", df[b1]) # 也可以书写为 df[df < 20]
print('-' * 100)
打印结果:
df =
a b c d
one 87.291813 52.283368 63.663946 51.061549
two 63.136263 3.771473 16.311522 49.151494
three 51.227805 94.406828 68.841388 38.832457
four 71.877744 24.139374 14.785827 40.320004
----------------------------------------------------------------------------------------------------
b1 =
a b c d
one False False False False
two False True True False
three False False False False
four False False True False
type(b1) = <class 'pandas.core.frame.DataFrame'>
--------------------------------------------------
df[b1] =
a b c d
one NaN NaN NaN NaN
two NaN 3.771473 16.311522 NaN
three NaN NaN NaN NaN
four NaN NaN 14.785827 NaN
----------------------------------------------------------------------------------------------------
Process finished with exit code 0
二、单列做判断
import numpy as np
import pandas as pd
# 布尔型索引
# 和Series原理相同
df = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=['one', 'two', 'three', 'four'],
columns=['a', 'b', 'c', 'd'])
print("df = \n", df)
print('-' * 100)
# 单列做判断
# 索引结果保留 单列判断为True的行数据,包括其他列
b2 = df['a'] > 50
print("b2 = \n{0} \ntype(b2) = {1}".format(b2, type(b2)))
print('-' * 50)
print("df[b2] = \n", df[b2]) # 也可以书写为 df[df['a'] > 50]
print('-' * 100)
打印结果:
df =
a b c d
one 58.278853 52.279956 26.786818 58.326127
two 2.962522 15.493893 61.898196 68.399135
three 3.041504 58.567052 72.174175 40.690512
four 26.269447 56.494043 24.593274 6.181143
----------------------------------------------------------------------------------------------------
b2 =
one True
two False
three False
four False
Name: a, dtype: bool
type(b2) = <class 'pandas.core.series.Series'>
--------------------------------------------------
df[b2] =
a b c d
one 58.278853 52.279956 26.786818 58.326127
----------------------------------------------------------------------------------------------------
Process finished with exit code 0
三、多列做判断
import numpy as np
import pandas as pd
# 布尔型索引
# 和Series原理相同
df = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=['one', 'two', 'three', 'four'],
columns=['a', 'b', 'c', 'd'])
print("df = \n", df)
print('-' * 100)
# 多列做判断
# 索引结果保留 所有数据:True返回原数据,False返回值为NaN
b3 = df[['a', 'b']] > 50
print("b3 = \n{0} \ntype(b3) = {1}".format(b3, type(b3)))
print('-' * 50)
print("df[b3] = \n", df[b3]) # 也可以书写为 df[df[['a','b']] > 50]
print('-' * 100)
打印结果:
df =
a b c d
one 83.682549 43.806675 18.337635 4.737157
two 26.310925 44.744914 88.206433 78.928505
three 17.707802 15.605249 42.536194 33.818267
four 56.771041 76.339754 6.616507 6.201109
----------------------------------------------------------------------------------------------------
b3 =
a b
one True False
two False False
three False False
four True True
type(b3) = <class 'pandas.core.frame.DataFrame'>
--------------------------------------------------
df[b3] =
a b c d
one 83.682549 NaN NaN NaN
two NaN NaN NaN NaN
three NaN NaN NaN NaN
four 56.771041 76.339754 NaN NaN
----------------------------------------------------------------------------------------------------
Process finished with exit code 0
四、多行做判断
import numpy as np
import pandas as pd
# 布尔型索引
# 和Series原理相同
df = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=['one', 'two', 'three', 'four'],
columns=['a', 'b', 'c', 'd'])
print("df = \n", df)
print('-' * 100)
# 多行做判断
# 索引结果保留 所有数据:True返回原数据,False返回值为NaN
b4 = df.loc[['one', 'three']] < 50
print("b4 = \n{0} \ntype(b4) = {1}".format(b4, type(b4)))
print('-' * 50)
print("df[b4] = \n", df[b4]) # 也可以书写为 df[df.loc[['one','three']] < 50]
print('-' * 100)
打印结果:
df =
a b c d
one 85.557233 63.095288 82.460026 11.354019
two 31.769011 54.641329 80.713415 59.087436
three 9.233315 11.215880 26.345829 40.227091
four 8.940842 76.884994 57.372274 78.014795
----------------------------------------------------------------------------------------------------
b4 =
a b c d
one False False False True
three True True True True
type(b4) = <class 'pandas.core.frame.DataFrame'>
--------------------------------------------------
df[b4] =
a b c d
one NaN NaN NaN 11.354019
two NaN NaN NaN NaN
three 9.233315 11.21588 26.345829 40.227091
four NaN NaN NaN NaN
----------------------------------------------------------------------------------------------------
Process finished with exit code 0