1.使用箱型法去除异常值:
import numpy as np
import pandas as pd
import matplotlib as plt
import os
data = pd.read_excel('try.xls', header=0)
# print(data.shape)
# print(data.head(10))
# print(data.describe())
neg_list = ['位移']
print("(1)数据的行数为:")
R = data.shape[0]
print(R)
print("(2)小于或大于阈值的数据提取:")
for item in neg_list:
neg_item = data[item]<2000
print(item + '小于2000的有' + str(neg_item.sum()) + '个')
print("(3)异常值的个数:")
for item in neg_list:
iqr = data[item].quantile(0.75) - data[item].quantile(0.25)
q_abnormal_L = data[item] < data[item].quantile(0.25) - 1.5 * iqr
q_abnormal_U = data[item] > data[item].quantile(0.75) + 1.5 * iqr
print(item + '中有' + str(q_abnormal_L.sum() + q_abnormal_U.sum()) + '个异常值')
print("(4)箱型图确定上下限:")
for item in neg_list:
iqr = data[item].quantile(0.75) - data[item].quantile(0.25)
Too_small = data[item].quantile(0.25) - 1.5 * iqr
Too_big = data[item].quantile(0.25) + 1.5 * iqr
print("下限是", Too_small)
print("上限是", Too_big )
print("(5)所有数据为:")
a = []
for i in neg_list:
a.append(data<