-
原文连接: https://blog.csdn.net/hnanxihotmail/article/details/81625854
-
>>> import pandas as pd
-
>>> import numpy as np
-
>>> #今天还是用到了DataFrame,如果你用一下它的筛选数据的功能,你会大吃一惊,它非常擅长筛选数据,可以极大提高你的工作效率,废话不多说,下面看看几个进行复杂数据筛选的例子。
-
>>> #首先我们创建一个DataFrame,该DataFrame包含的数据如下
-
>>> df=pd.DataFrame(np.random.randn(6,4),columns=list('ABCD'))
-
>>> df
-
A B C D
-
0 -1.108935 1.187163 1.546778 0.246329
-
1 -0.015045 1.367264 -0.617322 -1.068358
-
2 0.502788 0.305497 -0.819171 -0.331027
-
3 2.585354 -0.043285 1.056259 -0.079882
-
4 0.316549 -1.464567 1.504431 0.803362
-
5 -1.097251 -0.706594 -1.393058 -0.251690
-
>>> #假如我们想要筛选D列数据中大于0的行
-
>>> df[df.D>0]
-
A B C D
-
0 -1.108935 1.187163 1.546778 0.246329
-
4 0.316549 -1.464567 1.504431 0.803362
-
>>> #使用&符号可以实现多条件筛选,当然是用"|"符号也可以实现多条件,只不过他是或的关系。
-
>>> df[(df.D>0)&(df.C<0)]
-
Empty DataFrame
-
Columns: [A, B, C, D]
-
Index: []
-
>>> df[(df.D<0)&(df.C>0)]
-
A B C D
-
3 2.585354 -0.043285 1.056259 -0.079882
-
>>> df[(df.D<0.5)&(df.C>1.5)]
-
A B C D
-
0 -1.108935 1.187163 1.546778 0.246329
-
>>> df[(df.D<0.5)|(df.C>1.5)]
-
A B C D
-
0 -1.108935 1.187163 1.546778 0.246329
-
1 -0.015045 1.367264 -0.617322 -1.068358
-
2 0.502788 0.305497 -0.819171 -0.331027
-
3 2.585354 -0.043285 1.056259 -0.079882
-
4 0.316549 -1.464567 1.504431 0.803362
-
5 -1.097251 -0.706594 -1.393058 -0.251690
-
>>> df[(df.D<0.5)|(df.C>1.52)]
-
A B C D
-
0 -1.108935 1.187163 1.546778 0.246329
-
1 -0.015045 1.367264 -0.617322 -1.068358
-
2 0.502788 0.305497 -0.819171 -0.331027
-
3 2.585354 -0.043285 1.056259 -0.079882
-
5 -1.097251 -0.706594 -1.393058 -0.251690
-
>>> #假如我们只需要A和B列数据,而D和C列数据都是用于筛选的,可以这样写:只返回了AB两列数据
-
>>> df[['A','B']][(df.D>0)&(df.C<0)]
-
Empty DataFrame
-
Columns: [A, B]
-
Index: []
-
>>> df[['A','B']][(df.D<0)&(df.C>0)]
-
A B
-
3 2.585354 -0.043285
-
>>> index = (df.D<0)&(df.C>0)
-
>>> index
-
0 False
-
1 False
-
2 False
-
3 True
-
4 False
-
5 False
-
dtype: bool
-
>>> df(index)
-
Traceback (most recent call last):
-
File "<pyshell#19>", line 1, in <module>
-
df(index)
-
TypeError: 'DataFrame' object is not callable
-
>>> df[index]
-
A B C D
-
3 2.585354 -0.043285 1.056259 -0.079882
-
>>> #我们还可以使用insin方法来筛选特定的值,把要筛选的值写到一个列表里,如alist
-
>>> alist=[-0.079882,0.687050,0.3685412]
-
>>> df['D'].isin(alist)
-
0 False
-
1 False
-
2 False
-
3 False
-
4 False
-
5 False
-
Name: D, dtype: bool
-
>>> alist=[0.246329]
-
>>> df['D'].isin(alist)
-
0 False
-
1 False
-
2 False
-
3 False
-
4 False
-
5 False
-
Name: D, dtype: bool
-
>>> df[df['D'].isin(alist)]
-
Empty DataFrame
-
Columns: [A, B, C, D]
-
Index: []
-
>>> df=pd.DataFrame(np.random.normal(6,4),columns=list('ABCD'))
-
Traceback (most recent call last):
-
File "<pyshell#27>", line 1, in <module>
-
df=pd.DataFrame(np.random.normal(6,4),columns=list('ABCD'))
-
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas\core\frame.py", line 422, in __init__
-
raise ValueError('DataFrame constructor not properly called!')
-
ValueError: DataFrame constructor not properly called!
-
>>> df=pd.DataFrame(np.arange(16).reshape(4,4),columns=list('ABCD'))
-
>>> df
-
A B C D
-
0 0 1 2 3
-
1 4 5 6 7
-
2 8 9 10 11
-
3 12 13 14 15
-
>>> alist=[11]
-
>>> df['D'].isin(alist)
-
0 False
-
1 False
-
2 True
-
3 False
-
Name: D, dtype: bool
-
>>> df[df['D'].isin(alist)]
-
A B C D
-
2 8 9 10 11
-
>>>