@创建于:2022.07.02
@修改于:2022.07.02
1、构建样例数据
import pandas as pd
import numpy as np
df = pd.DataFrame({'books':['book_1', 'book_2', 'book_2', 'book_1', 'book_2', 'book_3'],
'price':[10, 20, 30, 30, 20, 10],
'num':[4, 6, 2, 4, 2, 8]})
books price num
0 book_1 10 4
1 book_2 20 6
2 book_2 30 2
3 book_1 30 4
4 book_2 20 2
5 book_3 10 8
df2 = pd.DataFrame({'books':['book_1', 'book_1', 'book_2', 'book_2', 'book_2', 'book_3', 'book_4'],
'price':[10, 20, 10, 30, None,10, np.NaN],
'num':[2, 4, np.NaN, 2, 4, 8, 2]})
books price num
0 book_1 10.0 2.0
1 book_1 20.0 4.0
2 book_2 10.0 NaN
3 book_2 30.0 2.0
4 book_2 NaN 4.0
5 book_3 10.0 8.0
6 book_4 NaN 2.0
2、filter,可以对分组进行操作
# 根据行情条数筛选
df.groupby('books').filter(lambda x: len(x)>1)
books price num
0 book_1 10 4
1 book_2 20 6
2 book_2 30 2
3 book_1 30 4
4 book_2 20 2
# 根据行情条数筛选
df.groupby('books').filter(lambda x: x['num'].sum()>8)
books price num
1 book_2 20 6
2 book_2 30 2
4 book_2 20 2
3、query,不能对分组操作
df2.query('price > 10 and num < 5')
books price num
1 book_1 20.0 4.0
3 book_2 30.0 2.0