import pandas as pd
# 正态分布
# 3sigma准则 --->
# mean() - 3* std() ---下限
# mean() + 3* std() ---上限
# 自实现3sigma 原则
def three_sigma(ser):
"""
自实现3sigma 原则
:param ser: 数据
:return: 处理完成的数据
"""
bool_id = ((ser.mean() - 3 * ser.std()) <= ser) & (ser <= (ser.mean() + 3 * ser.std()))
# bool数组索引
# ser[bool_id]
return ser.index[bool_id]
#使用detail 验证
deatil = pd.read_excel("./meal_order_detail.xlsx")
print(deatil.shape)
# 调用3sigma原则,进行异常值过滤
index_name_list = three_sigma(deatil['amounts'])
deatil = deatil.loc[index_name_list,:]
print(deatil.shape)
#percentile() 计算分位数
# np.percentile() ql-1.5iqr qu + 1.5iqr