布尔索引
这里你需要知道DateFrame中布尔索引这个东西,可以用满足布尔条件的列值来过滤数据,如下
import numpy as np
import pandas as pd
from pandas import *
from numpy import *
data_1=DataFrame(np.random.randn(4,4),columns=list("ABCD"))
print(data_1)
print(data_1.A>1)
print(data_1[data_1.A>1])
# =============================================================================
# A B C D
# 0 -1.222857 -1.043994 -0.442975 1.827680
# 1 0.726999 0.563181 2.319214 0.802055
# 2 0.065099 -0.008755 1.202573 0.180843
# 3 1.344852 -1.061955 -0.333201 -0.584720
# 0 False
# 1 False
# 2 False
# 3 True
# Name: A, dtype: bool
# A B C D
# 3 1.344852 -1.061955 -0.333201 -0.58472
# =============================================================================
isin()接受一个列表,判断该列中元素是否在列表中。
import numpy as np
import pandas as pd
from pandas import *
from numpy import *
data_1=DataFrame(np.random.randn(4,4),columns=list("ABCD"))
print(data_1)
print(data_1.A>1)
print(data_1[data_1.A>1])
# =============================================================================
# A B C D
# 0 0.169127 0.040550 0.559088 0.017715
# 1 1.994837 0.631279 0.094372 0.614455
# 2 0.682492 -1.601469 -0.569590 0.638142
# 3 1.690618 0.935999 -0.334878 -0.167669
# 0 False
# 1 True
# 2 False
# 3 True
# Name: A, dtype: bool
# A B C D
# 1 1.994837 0.631279 0.094372 0.614455
# 3 1.690618 0.935999 -0.334878 -0.167669
# =============================================================================
data_1["E"]=list("aabc")
data_1["F"]=list("adfd")
print(data_1)
print(data_1.E.isin(["a","c"]))
print(data_1[data_1.E.isin(["a","c"])])
# =============================================================================
# A B C D E F
# 0 0.169127 0.040550 0.559088 0.017715 a a
# 1 1.994837 0.631279 0.094372 0.614455 a d
# 2 0.682492 -1.601469 -0.569590 0.638142 b f
# 3 1.690618 0.935999 -0.334878 -0.167669 c d
# 0 True
# 1 True
# 2 False
# 3 True
# Name: E, dtype: bool
# A B C D E F
# 0 0.169127 0.040550 0.559088 0.017715 a a
# 1 1.994837 0.631279 0.094372 0.614455 a d
# 3 1.690618 0.935999 -0.334878 -0.167669 c d
# =============================================================================
#可以同时对多列进行过滤
data_2=data_1[(data_1.A>0)&data_1.E.isin(["a"])&data_1.F.isin(["d","f"])]
print(data_2)
# =============================================================================
# A B C D E F
# 1 1.994837 0.631279 0.094372 0.614455 a d
# =============================================================================
isin逆用法:
data_1=DataFrame(np.random.randn(4,4),columns=list("ABCD"))
data_1["E"]=list("aabc")
data_1["F"]=list("adfd")
print(data_1)
data_2=data_1[~(data_1.E.isin(["a"])&data_1.F.isin(["d","f"]))]
print(data_2)
# =============================================================================
# A B C D E F
# 0 -1.367389 -0.010230 -1.129077 1.710639 a a
# 1 1.685804 2.060111 1.262265 0.729453 a d
# 2 0.277453 -1.108263 0.150806 -1.038848 b f
# 3 -1.364977 -0.343693 0.450186 1.236273 c d
# A B C D E F
# 0 -1.367389 -0.010230 -1.129077 1.710639 a a
# 2 0.277453 -1.108263 0.150806 -1.038848 b f
# 3 -1.364977 -0.343693 0.450186 1.236273 c d
# =============================================================================
参考:https://blog.csdn.net/lzw2016/article/details/80472649
以上,记录本人学习过程。