import pandas as pd
pdd=pd.read_csv(r'C:\Users\pt0531\Desktop\2019-01.csv',dtype=str)
print(pdd.columns)
len(pdd) # 144538
#4列['so_date', 'so_voucher_no', 'series_code']
grouped=pdd.groupby(pdd['so_voucher_no']).size().reset_index(name='counts')
voucher_number=[]#要删除的voucher订单号列表
for i in range(len(grouped['counts'])):
if grouped['counts'][i]<3:
voucher_number.append(grouped['so_voucher_no'][i])
new_pdd=pdd[~pdd.so_voucher_no.isin(voucher_number)]
len(new_pdd) # 117693
df = pd.DataFrame({'vochure':['US','UK','Germany','UK'],'sc':['1001','1002','1004','1003']})
df
grouped=df.groupby(df['vochure']).size().reset_index(name='count')
voucher_number=[]#要删除的voucher订单号列表
for i in range(len(grouped['count'])):
if grouped['count'][i]<2:
voucher_number.append(grouped['vochure'][i])
voucher_number
new_df=df[~df.vochure.isin(voucher_number)]
new_df
python df.groupby对象——支持迭代、count等操作
最新推荐文章于 2025-03-03 22:23:36 发布