通过pyEDFlib官方提供的代码,实现将医院的24小时原始信号数据逐小时切分,并保存所需的通道信号。
import os
from pyedflib import highlevel
def split_edf(input_file, output_dir):
# 使用高级API读取EDF文件的信号和头信息
signals, signal_headers, header = highlevel.read_edf(input_file)
# 定义需要保留的通道
target_channels = [
"Fp1", "Fp2", "F3", "F4", "C3", "C4", "P3", "P4",
"O1", "O2", "F7", "F8", "T3", "T4", "T5", "T6"
]
# 根据目标通道名称获取这些通道的索引
target_channel_indices = [i for i, label in enumerate(signal_headers) if label['label'] in target_channels]
# 创建每小时的数据
hour_length = 512 * 3600 # 3600秒
for hour in range(int(len(signals[0]) / hour_length) + 1):
start_index = hour * hour_length
end_index = min(start_index + hour_length, len(signals[0]))
# 获取当前小时内的信号和头信息
hour_signals = [signal[start_index:end_index] for signal in signals]
hour_signal_headers = [header for header in signal_headers if header['label'] in target_channels]
# 构造新的文件名
output_file_path = os.path.join(output_dir, f"hour_{hour}.edf")
# 使用高级API写入EDF文件
highlevel.write_edf(output_file_path, hour_signals, hour_signal_headers, header)
def main():
# 指定 .edf 文件路径和输出目录
input_file = r"F:\2024test\癫痫数据集\EEG.edf"
output_dir = r"F:\2024test\癫痫数据集\hospitals_pit"
# 拆分 .edf 文件
split_edf(input_file, output_dir)
if __name__ == "__main__":
main()
后续:由于保存时间较长 通道数较多的文件切分占用内存较大。故采用分块方式读取和保存一小时数据
import os
import numpy as np
import pyedflib
from pyedflib import highlevel
def split_edf(input_file, output_dir):
# 打开 .edf 文件 with使用无需关闭文件
with pyedflib.EdfReader(input_file) as f:
total_samples = f.getNSamples()[0] # 总样本数 (采样频率*采样时间) 与信号数无关
signal_labels = f.getSignalLabels() # 信号标签
# 获取信号头信息
signal_headers = f.getSignalHeaders()
# 获取全局头信息
header = f.getHeader()
# 定义需要保留的通道
target_channels = [
"Fp1", "Fp2", "F3", "F4", "C3", "C4", "P3", "P4",
"O1", "O2", "F7", "F8", "T3", "T4", "T5", "T6"
]
# 创建每小时的数据
chunk_size = hour_length = 512 * 3600 # 3600秒
# # 根据目标通道名称获取这些通道的索引
target_channel_indices = [i for i, label in enumerate(signal_headers) if label['label'] in target_channels]
# 目标信号数量
num_target_signals = len(target_channel_indices)
# 目标信号头文件(信号头文件)
target_signal_headers = [signal_headers[i] for i in target_channel_indices]
# 逐步处理每个信号的数据
for i in range(0, total_samples, chunk_size):
chunk_end = min(i + chunk_size, total_samples)
chunk_signals = np.zeros((num_target_signals, chunk_end - i))
for j in range(0, num_target_signals):
if signal_labels[j] in target_channels: # 判断信号通道标签是否在目标通道中
with pyedflib.EdfReader(input_file) as f:
chunk_signals[j, :] = f.readSignal(j, start=i, n=chunk_end - i)
# 构造新的文件名
output_file_path = os.path.join(output_dir, f"hour_{int(i/chunk_size)}.edf")
# 将分块的数据(1小时)和目标信号头文件,以及全局头文件(不变)导入到新文件中
highlevel.write_edf(output_file_path, chunk_signals, target_signal_headers, header)
def main():
# 指定 .edf 文件路径和输出目录
input_file = r"F:\2024test\癫痫数据集\EEG\test.edf"
output_dir = r"F:\2024test\癫痫数据集\切分"
# 拆分 .edf 文件
split_edf(input_file, output_dir)
if __name__ == "__main__":
main()