异常检测 Task3

#coding:utf-8
#导入warnings包,利用过滤器来实现忽略警告语句。
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pyod.utils.data import generate_data

# 读取数据
Train_data = pd.read_csv('breast-cancer-unsupervised-ad.csv')

# 分割数据
contamination = 0.1
n_train = 1000
n_test = 300 

X_train,y_train,X_test,y_test = generate_data(n_train=n_train,
                                             n_test=n_test,
                                             contamination = contamination)

# 使用PCA
from pyod.models.pca import PCA

# train PCA detector
clf_name ='PCA'
clf = PCA()

# fit the model
clf.fit(X_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction labels and outlier scores of the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

y_test_pred_proba = clf.predict_proba(X_test) # the probability of being an outlier

#可视化结果
from pyod.utils.example import visualize
print(visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, y_test_pred, show_figure=True))

在这里插入图片描述

# 使用HBOS
# HBOS: Histogram-based Outlier Score
from pyod.models.hbos import HBOS
from pyod.utils.data import evaluate_print

contamination = 0.1
n_train = 1000
n_test = 300 

X_train,y_train,X_test,y_test = generate_data(n_train=n_train,
                                             n_test=n_test,
                                             contamination = contamination)
clf_name = 'HBOS'
clf = HBOS()
clf.fit(X_train) # 注意训练模型的时候,不需要输入y参数

# 得到训练标签和训练分数
y_train_pred = clf.labels_   # 0正常,1异常
y_train_scores = clf.decision_scores_  # 数值越大越异常

# 用训练好的模型预测测试数据的标签和分数
y_test_pred = clf.predict(X_test) 
y_test_scores = clf.decision_function(X_test)  

# 评估并打印结果
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)

# 可视化模型效果
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
          y_test_pred, show_figure=True, save_figure=True)

在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值