参考GDN的数据处理方式,每10条取平均值,标签为窗口内的多数值。
WADI
# import pandas as pd
train_new = pd.read_csv('D:/anomalydata/wadi/WADI.A2_19 Nov 2019/WADI_14days_new.csv')
test_new = pd.read_csv('D:/anomalydata/wadi/WADI.A2_19 Nov 2019/WADI_attackdataLABLE.csv', skiprows=1)
# test = pd.read_csv('D:/anomalydata/wadi/WADI.A1_9 Oct 2017/WADI_attackdata.csv')
# train = pd.read_csv('D:/anomalydata/wadi/WADI.A1_9 Oct 2017/WADI_14days.csv', skiprows=4)
# 这几列都是Nan值,直接赋值0
ncolumns = ['2_LS_001_AL', '2_LS_002_AL', '2_P_001_STATUS', '2_P_002_STATUS']
train_new[ncolumns]=0
test_new[ncolumns]=0
# test_new.columns
# 标签列1为异常-1正常修改为1异常0正常方便后续操作。
test_new.rename(columns={'Attack LABLE (1:No Attack, -1:Attack)':'label'},inplace=True)
test_new.loc[test_new['label'] == 1, 'label'] = 0
test_new.loc[test_new['label'] =&