PCA实例
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
import os
import sys
import unittest
# noinspection PyProtectedMember
from numpy.testing import assert_allclose
from numpy.testing import assert_array_less
from numpy.testing import assert_equal
from numpy.testing import assert_raises
from sklearn.metrics import roc_auc_score
from scipy.stats import rankdata
# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
#sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from pyod.models.pca import PCA
from pyod.utils.data import generate_data
class TestPCA(unittest.TestCase):
def setUp(self):
self.n_train = 200
self.n_test = 100
self.contamination = 0.1
self.roc_floor = 0.8
self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
n_train=self.n_train, n_test=self.n_test, n_features=10,
contamination=self.contamination, random_state=42)
self.clf = PCA(contamination=self.contamination,
random_state=42)
self.clf.fit(self.X_train)
def test_parameters(self):
assert (hasattr(self.clf, 'decision_scores_') and
self.clf.decision_scores_ is not None)
assert (hasattr(self.clf, 'labels_') and
self.clf.labels_ is not None)
assert (hasattr(self.clf, 'threshold_') and
self.clf.threshold_ is not None)
assert (hasattr(self.clf, '_mu') and
self.clf._mu is not None)
assert (hasattr(self.clf, '_sigma') and
self.clf._sigma is not None)
assert (hasattr(self.clf, 'selected_components_') and
self.clf.selected_components_ is not None)
assert (hasattr(self.clf, 'selected_w_components_') and
self.clf.selected_w_components_ is not None)
def test_train_scores(self):
assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
def test_prediction_scores(self):
pred_scores = self.clf.decision_function(self.X_test)
# check score shapes
assert_equal(pred_scores.shape[0], self.X_test.shape[0])
# check performance
assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
def test_prediction_labels(self):
pred_labels = self.clf.predict(self.X_test)
assert_equal(pred_labels.shape, self.y_test.shape)
def test_prediction_proba(self):
pred_proba = self.clf.predict_proba(self.X_test)
assert (pred_proba.min() >= 0)
assert (pred_proba.max() <= 1)
def test_prediction_proba_linear(self):
pred_proba = self.clf.predict_proba(self.X_test, method='linear')
assert (pred_proba.min() >= 0)
assert (pred_proba.max() <= 1)
def test_prediction_proba_unify(self):
pred_proba = self.clf.predict_proba(self.X_test, method='unify')
assert (pred_proba.min() >= 0)
assert (pred_proba.max() <= 1)
def test_prediction_proba_parameter(self):
with assert_raises(ValueError):
self.clf.predict_proba(self.X_test, method='something')
def test_fit_predict(self):
pred_labels = self.clf.fit_predict(self.X_train)
assert_equal(pred_labels.shape, self.y_train.shape)
def test_fit_predict_score(self):
self.clf.fit_predict_score(self.X_test, self.y_test)
self.clf.fit_predict_score(self.X_test, self.y_test,
scoring='roc_auc_score')
self.clf.fit_predict_score(self.X_test, self.y_test,
scoring='prc_n_score')
with assert_raises(NotImplementedError):
self.clf.fit_predict_score(self.X_test, self.y_test,
scoring='something')
def test_predict_rank(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test)
# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
assert_array_less(-0.1, pred_ranks)
def test_predict_rank_normalized(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
assert_array_less(pred_ranks, 1.01)
assert_array_less(-0.1, pred_ranks)
def tearDown(self):
pass
if __name__ == '__main__':
unittest.main()