使用LSTM模型对心跳时间序列数据预测(Python代码,ipynb环境)

173 篇文章 1 订阅
138 篇文章 5 订阅
本文介绍了如何使用Python库如matplotlib、numpy和深度学习框架如PyTorch中的LSTM和RNN对ECG信号进行预处理、时间序列预测,并评估了两种模型的性能,包括RMSE、MAE和AIC指标。
摘要由CSDN通过智能技术生成

所用模块版本:

matplotlib==3.7.1
numpy==1.24.4
pandas==1.5.3
scikit_learn==1.2.2
scipy==1.10.1
seaborn==0.12.2
statsmodels==0.14.0
torch==1.13.1
torch==2.0.1
wfdb==4.1.2

主代码:

import itertools
import pandas as pd
import matplotlib.pyplot as plt
#完整代码:mbd.pub/o/bread/mbd-ZpWUmZ1x


import wfdb
import matplotlib.pyplot as plt

# Specify the path to your downloaded data
path_to_data = 'file_resource/ecg-id-database-1.0.0/Person_03/'

# The record name is the filename without the extension
record_name = 'rec_1'

# Use the 'rdrecord' function to read the ECG data
record = wfdb.rdrecord(f'{path_to_data}/{record_name}')
# Plot the ECG data
plt.figure(figsize=(10, 4))
plt.plot(record.p_signal[:,1])
plt.title('ECG Signal')
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.show()
pd.DataFrame(record.p_signal[:,1],columns=["hr"]).to_csv("./P3_rec_1.csv")
hr2 = pd.DataFrame(record.p_signal[:,1],columns=["hr"])[0:10000]
# hr2["index"] = hr2.index


plt.figure(figsize=(10, 4))


from torch import nn
import pandas as pd
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
df = hr2.copy()
df_train = df.loc[:8000].copy()
df_test = df.loc[8000:10000].copy()

target_sensor = "hr"
# features = list(df.columns.difference([target_sensor]))
features = ["hr"]
batch_size =32
forecast_lead = 5
forcast_step = 1
target = f"{target_sensor}_lead{forecast_lead}"

df[target] = df[target_sensor].shift(-forecast_lead)
df = df.iloc[:-forecast_lead]

df_train = df.loc[:8000].copy()
df_test = df.loc[8000-forecast_lead:].copy()

print("Test set fraction:", len(df_train) / len(df_test))

target_mean = df_train[target].mean()
target_stdev = df_train[target].std()
for c in df_train.columns:
    mean = df_train[c].mean()
    stdev = df_train[c].std()

    df_train[c] = (df_train[c] - mean) / stdev
    df_test[c] = (df_test[c] - mean) / stdev
    
    
    
import torch
from torch.utils.data import Dataset

# parse the data into sliding windows
class SequenceDataset(Dataset):
    def __init__(self, dataframe, target, features,sequence_head=10, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_head = sequence_head
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[target].values).float()
        self.X = torch.tensor(dataframe[features].values).float()

    def __len__(self):
        return self.X.shape[0]-self.sequence_length +1

    def __getitem__(self, i):
        if i >= self.sequence_head - 1:
            i_start = i - self.sequence_head + 1
            x = self.X[i_start:(i + 1), :]

        else:
            padding = self.X[0].repeat(self.sequence_head - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)


        return x.to(device), self.y[i:i+self.sequence_length].to(device)



train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_head=forecast_lead,
    sequence_length=forcast_step,

)
    
    
from torch.utils.data import DataLoader
torch.manual_seed(99)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

for i, (X, y) in enumerate(train_loader):
#     print(X)
    # print(y)
#     print()
    if i >3:break   
    
    from torch.utils.data import DataLoader
torch.manual_seed(99)

    
    
torch.manual_seed(101)

# define the dataset
train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
       sequence_head=forecast_lead,
    sequence_length=forcast_step,
)

test_dataset = SequenceDataset(
    df_test,
    target=target,
    features=["hr"],
    sequence_head=forecast_lead,
    sequence_length=forcast_step,
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

X, y = next(iter(train_loader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)  
    
    
from torch import nn

# define the model
class ShallowRegressionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.num_directions = 1 # 单向LSTM
        self.batch_size = batch_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input_seq):
        batch_size, seq_len = input_seq.shape[0], input_seq.shape[1]
        h_0 = torch.randn(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
        c_0 = torch.randn(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
        output, _ = self.lstm(input_seq, (h_0, c_0))
        pred = self.linear(output)
        pred = pred[:, -1, :]
        return pred



class ShallowRegressionRNN(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.rnn = nn.RNN(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_().to(device)

        _, hn = self.rnn(x, h0)
        out = self.linear(hn).flatten()

        return out    
    
    
# instantiated the model
num_hidden_units = 512
# loss_function = nn.MSELoss()
model_lstm = ShallowRegressionLSTM(input_size=len(features), hidden_size=num_hidden_units, num_layers=1,output_size=forcast_step,batch_size=batch_size)
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

loss_function = RMSELoss()
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.to(device)
    model.train()

    for X, y in data_loader:
        # print(X.shape)
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function):

    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")
    return avg_loss


print("Untrained test\n--------")
# test_model(test_loader, model, loss_function)

avg_loss = 1
model_lstm.to(device)    
    
    
learning_rate = 5e-4
# train the model
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=learning_rate)
for ix_epoch in range(150):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model_lstm, loss_function, optimizer=optimizer)
    temp = test_model(test_loader, model_lstm, loss_function)
    if temp < avg_loss:
        avg_loss = temp
        torch.save(model_lstm.state_dict(), "model_lstm_%s_%s.pt"% (forecast_lead,forcast_step))
    # if ix_epoch % 20 == 0:
    #     learning_rate = learning_rate * 0.6
    #     optimizer = torch.optim.Adam(model_lstm.parameters(), lr=learning_rate)
    #     print(learning_rate)
    print()    
    
    
    
# save the model
# torch.save(model_lstm.state_dict(), "model_lstm.pt")

model_lstm.load_state_dict(torch.load("model_lstm_%s_%s.pt"% (forecast_lead,forcast_step)))    
    
# predict the model
def predict(data_loader, model):

    output = torch.tensor([]).to(device)
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)

    return output


train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "Model forecast"
pre = predict(train_eval_loader, model_lstm).cpu().numpy()
print(pre.shape)
df_train[ystar_col] = predict(train_eval_loader, model_lstm).cpu().numpy()
df_test[ystar_col] = predict(test_loader, model_lstm).cpu().numpy()

df_out = pd.concat((df_train, df_test))[[target, ystar_col]]



for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

print(df_out)
    
    
# use last predict data to be the next input
def predict_window(data_loader, model, forecast_step=2000):
    output = torch.tensor([]).to(device)
    model.eval()
    count = 0
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)
            count +=1
            # print(X)
            if count > forecast_lead:
                break
        for i in range(forecast_step-1):
            y_star = model(output[output.shape[0]-forecast_lead:].reshape(1,forecast_lead,1))
            print(output)
            print(output[output.shape[0]-forecast_lead:])
            # y_star = model(output.reshape(1,forecast_lead,1))
            output = torch.cat((output, y_star), 0)
            if i > 10:
                break

    return output
res = predict_window(test_loader, model_lstm).cpu().numpy()
print(res)
plt.plot(res)



import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score,mean_absolute_percentage_error
fig, ax = plt.subplots(figsize=(12, 6))
df_out[8000:].plot(ax=ax)
ax.set_title("LSTM model forecast")
ax.set_ylabel("ECG")
ax.set_xlabel("Time")
plt.show()

# calculate the  error
# calculate the  AIC
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score,mean_absolute_percentage_error

def calculate_aic(y_true, y_pred, num_params):
    mse = mean_squared_error(y_true, y_pred)
    aic = len(y_true) * np.log(mse) + 2 * num_params
    return aic

mse = mean_squared_error(df_out[target], df_out[ystar_col])
mae = mean_absolute_error(df_out[target], df_out[ystar_col])
r2 = r2_score(df_out[target], df_out[ystar_col])
mape = mean_absolute_percentage_error(df_out[target], df_out[ystar_col])
print(f"R2: {r2:.6f}")
print(f"MAPE: {mape:.6f}")
print(f"MAE: {mae:.6f} ")
print(f"RMSE: {np.sqrt(mse):.6f}")
print(f"mse: {mse:.6f}")
print(f"AIC: {calculate_aic(df_out[target], df_out[ystar_col], 1):.6f}")


num_hidden_units = 128
# use sdg loss
loss_function = nn.MSELoss()
model = ShallowRegressionRNN(num_sensors=len(features), hidden_units=num_hidden_units)
model.to(device)
avg_loss = 1
# loss_function = RMSELoss()

learning_rate = 1e-4

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for ix_epoch in range(500):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    temp = test_model(test_loader, model, loss_function)
    if temp < avg_loss:
        avg_loss = temp
        torch.save(model.state_dict(), "model_RNN.pt")
    print()

# save the model
# torch.save(model.state_dict(), "model_RNN.pt")

# load the model
model.load_state_dict(torch.load("model_RNN.pt"))
print(avg_loss)


res = predict_window(test_loader, model).cpu().numpy()
print(len(res))
plt.plot(res)

def predict(data_loader, model):

    output = torch.tensor([]).to(device)
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)

    return output


train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "Model forecast"
df_train[ystar_col] = predict(train_eval_loader, model).cpu().numpy()
df_test[ystar_col] = predict(test_loader, model).cpu().numpy()

df_out = pd.concat((df_train, df_test))[[target, ystar_col]]

# for c in df_out.columns:
#     df_out[c] = df_out[c] * target_stdev + target_mean

print(df_out)

import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(12, 6))
df_out[8000:].plot(ax=ax)
ax.set_title("RNN model forecast")
ax.set_ylabel("ECG")
ax.set_xlabel("Time")
plt.show()


from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score,mean_absolute_percentage_error
mse = mean_squared_error(df_out[target], df_out[ystar_col])
mae = mean_absolute_error(df_out[target], df_out[ystar_col])
r2 = r2_score(df_out[target], df_out[ystar_col])
mape = mean_absolute_percentage_error(df_out[target], df_out[ystar_col])
print(f"R2: {r2:.6f}")
print(f"MAPE: {mape:.6f}")
print(f"MAE: {mae:.6f} ")
print(f"RMSE: {np.sqrt(mse):.6f}")
print(f"mse: {mse:.6f}")
print(f"AIC: {calculate_aic(df_out[target], df_out[ystar_col], 1):.6f}")
# aic_res = calaic(df_out[target], df_out[ystar_col], df_out.shape[1])
# print(f"AIC: {aic_res:.6f}")

工学博士,担任《Mechanical System and Signal Processing》等期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
使用LSTM进行时间序列预测的步骤如下: 1. 准备数据集并进行预处理 2. 划分训练集和测试集 3. 构建LSTM模型 4. 训练模型并进行预测 5. 评估模型性能并进行可视化展示 以下是使用Python实现LSTM时间序列预测的示例代码: ```python import pandas as pd import numpy as np import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense, LSTM # 准备数据集并进行预处理 data = pd.read_csv('data.csv') data = data.dropna() # 删除空值 data = data['value'].values.reshape(-1, 1) # 转换为二维数组 data = data.astype('float32') # 转换为浮点型 # 划分训练集和测试集 train_size = int(len(data) * 0.8) # 训练集占80% test_size = len(data) - train_size train_data, test_data = data[0:train_size,:], data[train_size:len(data),:] # 构建LSTM模型 look_back = 3 # 每个样本包含3个时间步的特征 model = Sequential() model.add(LSTM(4, input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') # 训练模型并进行预测 X_train, y_train = [], [] for i in range(look_back, len(train_data)): X_train.append(train_data[i-look_back:i, 0]) y_train.append(train_data[i, 0]) X_train, y_train = np.array(X_train), np.array(y_train) X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1])) model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2) inputs = data[len(data) - len(test_data) - look_back:] inputs = inputs.reshape(-1, 1) X_test = [] for i in range(look_back, len(test_data)+look_back): X_test.append(inputs[i-look_back:i, 0]) X_test = np.array(X_test) X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1])) predicted = model.predict(X_test) # 评估模型性能并进行可视化展示 mse = np.mean((predicted - test_data)**2) print('MSE:', mse) plt.plot(test_data, label='true') plt.plot(predicted, label='predicted') plt.legend() plt.show() ``` 其中,`data.csv`为时间序列数据集文件,`value`为时间序列数值。代码使用了3个时间步的特征,即每个样本包含连续的3个时间点的数值,可以根据实际情况进行修改。模型训练过程中使用Adam优化器和均方误差损失函数,训练100个epochs。最后,输出模型的MSE评估指标,并可视化展示预测结果。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

哥廷根数学学派

码字不易,且行且珍惜

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值