- 数据预处理
- 特征工程
- 时间序列特征构造
- 特征筛选
- 使用 tsfresh 进行时间序列特征处理
import pandas as pd
import numpy as np
import tsfresh as tsf
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
data_train = pd.read_csv(r'D:\Chrome_download\train.csv')
data_test_A = pd.read_csv(r'D:\Chrome_download\testA.csv')
data_train=data_train[:1000]
data_test_A=data_test_A[:1000]
data_test_A.shape
train_heartbeat_df=data_train['heartbeat_signals'].str.split(',',expand=True).stack()
train_heartbeat_df = train_heartbeat_df.reset_index()
train_heartbeat_df = train_heartbeat_df.set_index("level_0")
train_heartbeat_df.index.name = None
train_heartbeat_df.rename(columns={"level_1":"time", 0:"heartbeat_signals"}
, inplace=True
)
train_heartbeat_df["heartbeat_signals"] = train_heartbeat_df["heartbeat_signals"].astype(float)
data_train_label = data_train["label"]
data_train = data_train.drop("label", axis=1)
data_train = data_train.drop("heartbeat_signals", axis=1)
data_train = data_train.join(train_heartbeat_df)
train_features = extract_features(data_train, column_id='id', column_sort='time')
train_features
from tsfresh.utilities.dataframe_functions import impute
impute(train_features)
from tsfresh import select_features
train_features_filtered = select_features(train_features, data_train_label)
train_features_filtered