PSG机器学习

最新推荐文章于 2024-10-15 21:19:38 发布
qq_45779564
最新推荐文章于 2024-10-15 21:19:38 发布
阅读量146
点赞数 1
文章标签：机器学习人工智能
本文链接：https://blog.csdn.net/qq_45779564/article/details/137637983
版权
使用Python-EEG工具库MNE-机器学习算法随机森林、SVM、逻辑回归、KNN、朴素贝叶斯、多层感知机、LSTM等方法判断睡眠类型

import numpy as np
import matplotlib.pyplot as plt

import mne

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
"""
可以通过
mne.datasets.sleep_physionet.age.fetch_data(subjects,recording,path)
来获取PhysioNet多导睡眠图数据集文件。
subjects：表示想要使用哪些受试者对象，可供选择的受试者对象范围为0-19。
recording：表示夜间记录的编号(索引)，有效值为：[1]、[2]或[1、2]。

path:PhysioNet数据的存放地址，如果没有给定，则加载默认存放数据的地址；
如果默认存放数据集的地址不存在数据，则从网络中下载相关数据。
"""

# 选择两个受试者实验对象ALICE, BOB(该名字并非实验中的真实名，这里是为了方便才临时取的名字)
ALICE, BOB = 0, 1
# 加载ALICE, BOB的实验数据文件
[alice_files, bob_files] = mne.datasets.sleep_physionet.age.fetch_data(subjects=[ALICE, BOB], recording=[1])

# 通道名称映射
mapping = {'EOG horizontal': 'eog',
           'Resp oro-nasal': 'misc',
           'EMG submental': 'misc',
           'Temp rectal': 'misc',
           'Event marker': 'misc'}

#读取ALICE的edf文件，和其对应的注释文件
raw_train = mne.io.read_raw_edf(alice_files[0])
annot_train = mne.read_annotations(alice_files[1])

raw_train.set_annotations(annot_train, emit_warning=False)
raw_train.set_channel_types(mapping)

# 绘制空0s开始，时间窗口长度为40s的连续通道数据波形图
raw_train.plot(duration=40, scalings='auto')
plt.show()

"""
睡眠表示与事件映射
"""
annotation_desc_2_event_id = {'Sleep stage W': 1,
                              'Sleep stage 1': 2,
                              'Sleep stage 2': 3,
                              'Sleep stage 3': 4,
                              'Sleep stage 4': 4,
                              'Sleep stage R': 5}

events_train, _ = mne.events_from_annotations(
    raw_train, event_id=annotation_desc_2_event_id, chunk_duration=30.)

# 创建一个新的event_id以统一 阶段3和4
event_id = {'Sleep stage W': 1,
            'Sleep stage 1': 2,
            'Sleep stage 2': 3,
            'Sleep stage 3/4': 4,
            'Sleep stage R': 5}

# 绘制事件数据
mne.viz.plot_events(events_train, event_id=event_id,
                    sfreq=raw_train.info['sfreq'])

# 保留颜色代码以便进一步绘制
stage_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
Used Annotations descriptions: ['Sleep stage 1', 'Sleep stage 2', 'Sleep stage 3', 'Sleep stage 4', 'Sleep stage R', 'Sleep stage W']

tmax = 30. - 1. / raw_train.info['sfreq']  # tmax in included
"""
所创建的是时间从tmin=0开始，到tmax为止的epochs
"""
epochs_train = mne.Epochs(raw=raw_train, events=events_train,
                          event_id=event_id, tmin=0., tmax=tmax, baseline=None)

print(epochs_train)


epochs_test = mne.Epochs(raw=raw_test, events=events_test, event_id=event_id,
                         tmin=0., tmax=tmax, baseline=None)

print(epochs_test)

fig, (ax1, ax2) = plt.subplots(ncols=2)

# iterate over the subjects
stages = sorted(event_id.keys())
for ax, title, epochs in zip([ax1, ax2],
                             ['Alice', 'Bob'],
                             [epochs_train, epochs_test]):

    for stage, color in zip(stages, stage_colors):
        epochs[stage].plot_psd(area_mode=None, color=color, ax=ax,
                               fmin=0.1, fmax=20., show=False,
                               average=True, spatial_colors=False)
    ax.set(title=title, xlabel='Frequency (Hz)')
ax2.set(ylabel='uV^2/hz (dB)')
ax2.legend(ax2.lines[2::3], stages)
plt.tight_layout()
plt.show()


from mne.time_frequency import psd_array_welch

def eeg_power_band(epochs):
    """脑电相对功率带特征提取
    该函数接受一个 'mne.Epochs' 对象，
    并基于与 scikit-learn 兼容的特定频带中的相对功率创建 EEG 特征。
    Parameters
    ----------
    epochs : mne.Epochs
        The data.
    Returns
    -------
    X : numpy array of shape [n_samples, 5]
        Transformed data.
    """
    # 特定频带
    FREQ_BANDS = {"delta": [0.5, 4.5],
                  "theta": [4.5, 8.5],
                  "alpha": [8.5, 11.5],
                  "sigma": [11.5, 15.5],
                  "beta": [15.5, 30]}

    # 提取 EEG 数据的 numpy 表示
    epochs_data = epochs.get_data()

    psds, freqs = psd_array_welch(epochs_data, sfreq=epochs.info['sfreq'], fmin=0.5, fmax=30.)
    psds /= np.sum(psds, axis=-1, keepdims=True)

    X = []
    for fmin, fmax in FREQ_BANDS.values():
        psds_band = psds[:, :, (freqs >= fmin) & (freqs < fmax)].mean(axis=-1)
        X.append(psds_band.reshape(len(psds), -1))

    return np.concatenate(X, axis=1)
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     RandomForestClassifier(n_estimators=100, random_state=100))
# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Loading data for 2650 events and 3000 original #time points ...
#0 bad epochs dropped
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#0 bad epochs dropped
#Effective window size : 2.560 (s)
#Accuracy score: 0.8344039971448965

print(classification_report(y_test, y_pred, target_names=event_id.keys()))
#                 precision    recall  f1-score   #support
#
#  Sleep stage W       0.83      1.00      0.91      #1856
#  Sleep stage 1       0.00      0.00      0.00       109
#  Sleep stage 2       0.94      0.62      0.74       562
#Sleep stage 3/4       0.74      0.95      0.83       105
#  Sleep stage R       0.56      0.21      0.31       170

#       accuracy                           0.83      2802
#      macro avg       0.61      0.56      0.56      2802
#   weighted avg       0.80      0.83      0.80      2802

from sklearn.svm import SVC
# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     SVC(kernel='linear'))  # 使用线性核函数，你也可以尝试其他核函数

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original #time #points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.662384011420414

from sklearn.svm import SVC
# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     SVC(kernel='poly'))  # 使用线性核函数，你也可以尝试其他核函数

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time #points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.7894361170592434

from sklearn.svm import SVC
# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     SVC(kernel='rbf'))  # 使用线性核函数，你也可以尝试其他核函数

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.7623126338329764

from sklearn.svm import SVC
# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     SVC(kernel='sigmoid'))  # 使用线性核函数，你也可以尝试其他核函数

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.6627408993576017

from sklearn.neighbors import KNeighborsClassifier
# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     KNeighborsClassifier(n_neighbors=5))  # 设置邻居数，这里设为5，你也可以根据需求调整

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.8194147037830122

from sklearn.linear_model import LogisticRegression

# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     LogisticRegression(max_iter=1000))  # 设置最大迭代次数，确保模型收敛

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.662384011420414

from sklearn.neural_network import MLPClassifier

# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     MLPClassifier(hidden_layer_sizes=(100, ), max_iter=1000))  # 设置隐藏层大小和最大迭代次数

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.8465381870092791

from sklearn.naive_bayes import GaussianNB

# 创建流水线
pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
                     GaussianNB())

# 训练
epochs_train.load_data()

y_train = epochs_train.events[:, 2]
pipe.fit(epochs_train, y_train)

# 预测
y_pred = pipe.predict(epochs_test)

# 评估准确率
y_test = epochs_test.events[:, 2]
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))
#Effective window size : 2.560 (s)
#Loading data for 2802 events and 3000 original time points ...
#Effective window size : 2.560 (s)
#Accuracy score: 0.8201284796573876

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

# 从 Epochs 对象中提取数据
X_train_tensor = torch.Tensor(epochs_train.get_data())  # 假设 epochs_train 是 MNE Epochs 对象
y_train_tensor = torch.LongTensor(epochs_train.events[:, 2])
X_test_tensor = torch.Tensor(epochs_test.get_data())    # 假设 epochs_test 是 MNE Epochs 对象
y_test_tensor = torch.LongTensor(epochs_test.events[:, 2])

# 创建数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 定义神经网络模型
class EEGClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(EEGClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, 256, batch_first=True)
        self.dropout = nn.Dropout(0.5)  # 添加 dropout 层
        self.fc = nn.Linear(256, num_classes)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dropout(out)  # 在 LSTM 层输出之后应用 dropout
        out = self.fc(out[:, -1, :])  # 获取最后一个时间步的输出
        return out


# 实例化模型
hidden_size1 = 200  # 第一个隐藏层大小
hidden_size2 = 100  # 第二个隐藏层大小
output_size = len(set(y_train_tensor.numpy()))  # 输出大小，类别数量
# 计算特征数
num_samples, num_channels, num_times = epochs_train.get_data().shape
num_features = num_channels * num_times

# 实例化神经网络模型
model = EEGClassifier(3000, output_size)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
# 将标签值减去 1
y_train_tensor -= 1
y_test_tensor -= 1

num_epochs = 10
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# 测试模型
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    acc = accuracy_score(y_test_tensor.numpy(), predicted.numpy())

print("准确率: {}".format(acc))