简单的基于深度学习的NASA轴承时间序列数据异常检测(Python)

194 篇文章 1 订阅
155 篇文章 5 订阅

Autoencoder

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow  
from dataset import load_data, filter_dataset, index_timestamps, normalize_data
from keras import regularizers
from keras.layers.core import Dense 
from keras.models import Model, Sequential, load_model
from numpy.random import seed


def create_model(X_train, X_test):
    seed(10)
    tensorflow.random.set_seed(10)
    act_func = 'elu'


    # Input layer:
    model=Sequential()
    # First hidden layer, connected to input vector X. 
    model.add(Dense(10,activation=act_func,
                    kernel_initializer='glorot_uniform',
                    kernel_regularizer=regularizers.l2(0.0),
                    input_shape=(X_train.shape[1],)
                )
            )


    model.add(Dense(2,activation=act_func,
                    kernel_initializer='glorot_uniform'))


    model.add(Dense(10,activation=act_func,
                    kernel_initializer='glorot_uniform'))


    model.add(Dense(X_train.shape[1],
                    kernel_initializer='glorot_uniform'))


    model.compile(loss='mse',optimizer='adam')


    # Train model for 100 epochs, batch size of 10: 
    NUM_EPOCHS=100
    BATCH_SIZE=10


    history=model.fit(np.array(X_train),np.array(X_train),
                    batch_size=BATCH_SIZE, 
                    epochs=NUM_EPOCHS,
                    validation_split=0.05,
                    verbose = 1)


    plt.plot(history.history['loss'],
            'b',
            label='Training loss')
    plt.plot(history.history['val_loss'],
            'r',
            label='Validation loss')
    plt.legend(loc='upper right')
    plt.xlabel('Epochs')
    plt.ylabel('Loss, [mse]')
    plt.ylim([0,.1])
    plt.show()
    return model 


def view_anomalies(X_train, X_test, model):
    #Plot the distribution of calculated loss to identify threshold
    X_pred = model.predict(np.array(X_train))
    X_pred = pd.DataFrame(X_pred, 
                        columns=X_train.columns)
    X_pred.index = X_train.index


    scored = pd.DataFrame(index=X_train.index)
    scored['Loss_mae'] = np.mean(np.abs(X_pred-X_train), axis = 1)
    plt.figure()
    sns.distplot(scored['Loss_mae'],
                bins = 10, 
                kde= True,
                color = 'blue');
    plt.xlim([0.0,.5])
    plt.show()


    #Compare output to threshold 
    X_pred = model.predict(np.array(X_test))
    X_pred = pd.DataFrame(X_pred, 
                        columns=X_test.columns)
    X_pred.index = X_test.index


    scored = pd.DataFrame(index=X_test.index)
    scored['Loss_mae'] = np.mean(np.abs(X_pred-X_test), axis = 1)
    scored['Threshold'] = 0.3
    scored['Anomaly'] = scored['Loss_mae'] > scored['Threshold']
    print(scored.head())


    #Merge results into single dataframe and plot results
    X_pred_train = model.predict(np.array(X_train))
    X_pred_train = pd.DataFrame(X_pred_train, 
                        columns=X_train.columns)
    X_pred_train.index = X_train.index


    scored_train = pd.DataFrame(index=X_train.index)
    scored_train['Loss_mae'] = np.mean(np.abs(X_pred_train-X_train), axis = 1)
    scored_train['Threshold'] = 0.3
    scored_train['Anomaly'] = scored_train['Loss_mae'] > scored_train['Threshold']
    scored = pd.concat([scored_train, scored])


    scored.plot(logy=True,  figsize = (10,6), ylim = [1e-2,1e2], color = ['blue','red'])
    plt.show()


df = load_data()
dataset_train, dataset_test = filter_dataset(df)
dataset_train, dataset_test  = index_timestamps(dataset_train, dataset_test)
X_train, X_test = normalize_data(dataset_train, dataset_test)
model = create_model(X_train, X_test)
view_anomalies(X_train, X_test, model)

Mahalanobis Distance

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from dataset import load_data, filter_dataset, index_timestamps, normalize_data, do_PCA


def covariance_matrix(data, verbose=False):
    covariance_matrix = np.cov(data, rowvar=False)
    if is_pos_def(covariance_matrix):
        inv_covariance_matrix = np.linalg.inv(covariance_matrix)
        if is_pos_def(inv_covariance_matrix):
            return covariance_matrix, inv_covariance_matrix
        else:
            print("Error: Inverse of Covariance Matrix is not positive definite!")
    else:
        print("Error: Covariance Matrix is not positive definite!")


def MahalanobisDist(inv_cov_matrix, mean_distr, data, verbose=False):
    inv_covariance_matrix = inv_cov_matrix
    vars_mean = mean_distr
    diff = data - vars_mean
    md = []
    for i in range(len(diff)):
        md.append(np.sqrt(diff[i].dot(inv_covariance_matrix).dot(diff[i])))
    return md


def MD_detectOutliers(dist, extreme=False, verbose=False):
    k = 3. if extreme else 2.
    threshold = np.mean(dist) * k
    outliers = []
    for i in range(len(dist)):
        if dist[i] >= threshold:
            outliers.append(i)  # index of the outlier
    return np.array(outliers)


def MD_threshold(dist, extreme=False, verbose=False):
    k = 3. if extreme else 2.
    threshold = np.mean(dist) * k
    return threshold


def is_pos_def(A):
    if np.allclose(A, A.T):
        try:
            np.linalg.cholesky(A)
            return True
        except np.linalg.LinAlgError:
            return False
    else:
        return False


def view_anomalies(X_train_PCA, X_test_PCA):
    #Mahalanobis Distance test
    data_train = np.array(X_train_PCA.values)
    data_test = np.array(X_test_PCA.values)
    cov_matrix, inv_cov_matrix  = covariance_matrix(data_train)
    mean_distr = data_train.mean(axis=0)
    dist_test = MahalanobisDist(inv_cov_matrix, mean_distr, data_test, verbose=False)
    dist_train = MahalanobisDist(inv_cov_matrix, mean_distr, data_train, verbose=False)
    threshold = MD_threshold(dist_train, extreme = True)


    plt.figure()
    sns.distplot(np.square(dist_train),
                bins = 10, 
                kde= False);
    plt.xlim([0.0,15])


    #Plot data
    plt.figure()
    sns.distplot(dist_train,
                bins = 10, 
                kde= True, 
                color = 'green');
    plt.xlim([0.0,5])
    plt.xlabel('Mahalanobis dist')
    plt.show()


    return dist_test, dist_train, threshold


def validate_data(dist_test, dist_train, threshold, X_train_PCA):
    anomaly_train = pd.DataFrame()
    anomaly_train['Mob dist']= dist_train
    anomaly_train['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly_train['Anomaly'] = anomaly_train['Mob dist'] > anomaly_train['Thresh']
    anomaly_train.index = X_train_PCA.index
    anomaly = pd.DataFrame()
    anomaly['Mob dist']= dist_test
    anomaly['Thresh'] = threshold
    # If Mob dist above threshold: Flag as anomaly
    anomaly['Anomaly'] = anomaly['Mob dist'] > anomaly['Thresh']
    anomaly.index = X_test_PCA.index
    print(anomaly.head())
    anomaly_alldata = pd.concat([anomaly_train, anomaly])
    anomaly_alldata.plot(logy=True, figsize = (10,6), ylim = [1e-1,1e3], color = ['green','red'])
    plt.show()


df = load_data()
dataset_train, dataset_test = filter_dataset(df)
dataset_train, dataset_test  = index_timestamps(dataset_train, dataset_test)
X_train, X_test = normalize_data(dataset_train, dataset_test)
X_train_PCA, X_test_PCA = do_PCA(X_train, X_test)
dist_test, dist_train, threshold = view_anomalies(X_train_PCA, X_test_PCA)
validate_data(dist_test, dist_train, threshold, X_train_PCA)

知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1

担任《Mechanical System and Signal Processing》等审稿专家,擅长领域:信号滤波/降噪,机器学习/深度学习,时间序列预分析/预测,设备故障诊断/缺陷检测/异常检测。

分割线

基于最小最大凹面全变分非平稳信号降噪方法(Python)

完整代码:

mbd.pub/o/bread/Y5qXl5dr

  • 21
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

哥廷根数学学派

码字不易,且行且珍惜

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值