Python Data Visualization Cookbook 2.9.2

 1 import numpy as np
 2 import matplotlib.pyplot as plt
 3 
 4 
 5 def is_outlier(points, threshold=3.5):
 6     if len(points.shape) == 1:
 7         points = points[:, None]
 8 
 9     # Find the median number of points
10     median = np.median(points, axis=0)
11 
12     diff = np.sum((points - median)**2, axis=-1)
13     diff = np.sqrt(diff)
14     MAD = np.median(diff)
15 
16     MZS = 0.6745 * diff / MAD
17 
18     return MZS > threshold
19 
20 # Create 100 random numbers
21 x = np.random.random(100)
22 
23 # The number of the histogram buckets
24 buckets = 50
25 
26 # Add in a few outliers
27 x = np.r_[x, -49, 95, 100, -100]
28 
29 # The function 'is_outlier()' return a array of boolean
30 # If True, get the element; else pass the element
31 # For example:
32 # x = [1,2,3,4]
33 # y = x[array([False,True,True,False])]
34 # y is [2,3]
35 filtered = x[~is_outlier(x)]
36 
37 # Create a new figure
38 plt.figure()
39 
40 # Define the width of the figure
41 plt.subplot(211)
42 # Drawing histogram
43 # histogram(arr,bins,normed,facecolor,edgecolor,alpha,histtype)
44 plt.hist(x, buckets)
45 plt.xlabel('Raw')
46 
47 plt.subplot(212)
48 plt.hist(filtered, buckets)
49 plt.xlabel('Cleaned')
50 
51 # Show the figure
52 plt.show()

 

转载于:https://www.cnblogs.com/barrier/p/6062481.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值