实例 — 异常检测（1）：用Pyod工具检测人工数据生成数据

最新推荐文章于 2022-03-14 16:15:50 发布

Fffffeifei

最新推荐文章于 2022-03-14 16:15:50 发布

阅读量423

点赞数 1

本文链接：https://blog.csdn.net/qq_40253214/article/details/105536259

版权

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.font_manager
# 导入想要用来检测数据集中异常值的模型。使用ABOD和KNN：
from pyod.models.abod import ABOD
from pyod.models.knn import KNN
# 创建一个带有异常值的随机数据集并绘制它
from pyod.utils.data import generate_data, get_outliers_inliers
# 生成数据
X_train, Y_train = generate_data(n_train=200,train_only=True, n_features=2)
# 在默认情况下，离群值在生成数据函数中是0.1
outlier_fraction = 0.1
# 将异常值和异常值存储在不同的numpy数组中
x_outliers, x_inliers = get_outliers_inliers(X_train,Y_train)
n_inliers = len(x_inliers)
n_outliers = len(x_outliers)
#分离这两个特性并使用它来绘制数据
F1 = X_train[:,[0]].reshape(-1,1)
F2 = X_train[:,[1]].reshape(-1,1)
# create a meshgrid
xx , yy = np.meshgrid(np.linspace(-10, 10, 200), np.linspace(-10, 10, 200))
# scatter plot
plt.scatter(F1,F2)
plt.xlabel('F1')
plt.ylabel('F2')
#plt.show()
#创建一个dictionary并添加要用于检测异常值的所有模型：
classifiers = {
'Angle-based Outlier Detector (ABOD)' : ABOD(contamination=outlier_fraction),
'K Nearest Neighbors (KNN)' : KNN(contamination=outlier_fraction)
}
#将数据拟合到我们在dictionary中添加的每个模型，然后，查看每个模型如何检测异常值：
#set the figure size
plt.figure(figsize=(10, 10))
for i, (clf_name,clf) in enumerate(classifiers.items()) :
    # fit the dataset to the model
    clf.fit(X_train)
    # predict raw anomaly score
    scores_pred = clf.decision_function(X_train)*-1
    # prediction of a datapoint category outlier or inlier
    y_pred = clf.predict(X_train)
    # no of errors in prediction
    n_errors = (y_pred != Y_train).sum()
    print('No of Errors : ',clf_name, n_errors)
    # rest of the code is to create the visualization
    # threshold value to consider a datapoint inlier or outlier
    threshold = stats.scoreatpercentile(scores_pred,100 *outlier_fraction)
    # decision function calculates the raw anomaly score for every point
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1
    Z = Z.reshape(xx.shape)
    subplot = plt.subplot(1, 2, i + 1)
    # fill blue colormap from minimum anomaly score to threshold value
    subplot.contourf(xx, yy, Z, levels = np.linspace(Z.min(), threshold, 10),cmap=plt.cm.Blues_r)
    # draw red contour line where anomaly score is equal to threshold
    a = subplot.contour(xx, yy, Z, levels=[threshold],linewidths=2, colors='red')
    # fill orange contour lines where range of anomaly score is from threshold to maximum anomaly score
    subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],colors='orange')
    # scatter plot of inliers with white dots
    b = subplot.scatter(X_train[:-n_outliers, 0], X_train[:-n_outliers, 1], c='white',s=20, edgecolor='k')
    # scatter plot of outliers with black dots
    c = subplot.scatter(X_train[-n_outliers:, 0], X_train[-n_outliers:, 1], c='black',s=20, edgecolor='k')
    subplot.axis('tight')
    subplot.legend(
    [a.collections[0], b, c],
    ['learned decision function', 'true inliers', 'true outliers'],
    prop=matplotlib.font_manager.FontProperties(size=10),
    loc='lower right')
    subplot.set_title(clf_name)
subplot.set_xlim((-10, 10))
subplot.set_ylim((-10, 10))

plt.show()

Fffffeifei

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
实例 — 异常检测（1）：用Pyod工具检测人工数据生成数据

import numpy as npfrom scipy import statsimport matplotlib.pyplot as pltimport matplotlib.font_manager# 导入想要用来检测数据集中异常值的模型。使用ABOD和KNN：from pyod.models.abod import ABODfrom pyod.models.knn import...
复制链接

扫一扫