孤立森林(Isolation Forest)-python实例


两个实例:1.来自于 sklearn官网 2.自己构造数据,在第二个例子中发现,dbscan认为的的异常包含在itree认为异常中,
也就是说itree,会给出一个异常的排序,需要看到这个顺序(后面再做)
例子1:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

rng = np.random.RandomState(42)

# Generate train data
X = 0.3 * rng.randn(100, 2)

X_train = np.r_[X + 2, X - 2]
print("len X_train",len(X_train))
# Generate some regular novel observations
X = 0.3 * rng.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
print("len X_test",len(X_test))
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
#print("X_outliers:",X_outliers)
# fit the model
clf = IsolationForest(behaviour='new', max_samples=100,
                      random_state=rng, contamination='auto')
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
print("y_pred_train:",y_pred_train)
print("1",X_train[y_pred_train==1])
print("-1",X_train[y_pred_train==-1])
print("###########################")
y_pred_test = clf.predict(X_test)
print("y_pred_test:",y_pred_test)
print("1",X_test[y_pred_test==1])
print("-1",X_test[y_pred_test==-1])
print("###########################")
y_pred_outliers = clf.predict(X_outliers)
print("y_pred_outliers:",y_pred_outliers)
print("1",X_outliers[y_pred_outliers==1])
print("-1",X_outliers[y_pred_outliers==-1])


"""
# plot the line, the samples, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.title("IsolationForest")
plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)

b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',
                 s=20, edgecolor='k')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',
                 s=20, edgecolor='k')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',
                s=20, edgecolor='k')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([b1, b2, c],
           ["training observations",
            "new regular observations", "new abnormal observations"],
           loc="upper left")
plt.show()
"""

例子2:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

data=[
    [-2.68420713,1.469732895,2],[-2.71539062,-0.763005825,2],[-2.88981954,-0.618055245,2],[-2.7464372,-1.40005944,2],[-2.72859298,1.50266052,2],
    [-2.27989736,3.365022195,2],[-2.82089068,-0.369470295,2],[-2.62648199,0.766824075,2],[-2.88795857,-2.568591135,2],[-2.67384469,-0.48011265,2],
    [-2.50652679,2.933707545,2],[-2.61314272,0.096842835,2],[-2.78743398,-1.024830855,2],[-3.22520045,-2.264759595,2],[-2.64354322,5.33787705,2],
    [-2.38386932,6.05139453,2],[-2.6225262,3.681403515,2],[-2.64832273,1.436115015,2],[-2.19907796,3.956598405,2],[-2.58734619,2.34213138,2],
    [1.28479459,3.084476355,2],[0.93241075,1.436391405,2],[1.46406132,2.268854235,2],[0.18096721,-3.71521773,2],[1.08713449,0.339256755,2],
    [0.64043675,-1.87795566,2],[1.09522371,1.277510445,2],[-0.75146714,-4.504983795,2],[1.04329778,1.030306095,2],[-0.01019007,-3.242586915,2],
    [-0.5110862,-5.681213775,2],[0.51109806,-0.460278495,2],[0.26233576,-2.46551985,2],[0.98404455,-0.55962189,2],[-0.174864,-1.133170065,2],
    [0.92757294,2.107062945,2],[0.65959279,-1.583893305,2],[0.23454059,-1.493648235,2],[0.94236171,-2.43820017,2],[0.0432464,-2.616702525,2],
    [4.53172698,-0.05329008,2],[30.41407223,-2.58716277,2],[4.61648461,1.538708805,2],[3.97081495,-0.815065605,2],[4.34975798,-0.188471475,2],
    [5.39687992,2.462256225,2],[2.51938325,-5.361082605,2],[4.9320051,1.585696545,2],[4.31967279,-1.104966765,2],[4.91813423,3.511712835,2],
    [3.66193495,1.0891728,2],[111,-0.972695745,2],[4.16537886,0.96876126,2],[3.34459422,-3.493869435,2],[3.5852673,-2.426881725,2],
    [3.90474358,0.534685455,2],[3.94924878,0.18328617,2],[59.48876538,5.27195043,13],[5.79468686,1.139695065,2],[3.29832982,-3.42456273,2],
[-2.68420713,1.469732895,2],[-2.71539062,-0.763005825,2],[-2.88981954,-0.618055245,2],[-2.7464372,-1.40005944,2],[-2.72859298,1.50266052,2]]
X_train = np.array(data)
print("X_train:",X_train)
# fit the model
clf = IsolationForest(behaviour='new', max_samples=100,
                       contamination='auto')
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
print("y_pred_train:",y_pred_train)
print("1",X_train[y_pred_train==1])
print("-1",X_train[y_pred_train==-1])
print("###########################")
  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小金子的夏天

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值