CHB-MIT数据预处理（三）

最新推荐文章于 2023-11-11 16:17:50 发布

y1n_o

最新推荐文章于 2023-11-11 16:17:50 发布

阅读量635

点赞数 1

文章标签：数据分析

本文链接：https://blog.csdn.net/weixin_66157944/article/details/134167136

版权

把数据集的每个实验对象的summary文件整合到一个txt里：

import os

# 指定包含子文件夹的主文件夹路径
main_folder = '.../CHB-MIT'  # 替换为实际的主文件夹路径

# 指定要保存合并内容的文件
output_file = '.../seizure_summary.txt'  # 替换为你想要的输出文件名

# 指定要删除的关键词
keywords_to_remove = ['Channel']  # 替换为你的关键词列表

# 打开输出文件以便写入内容
with open(output_file, 'w', encoding='utf-8') as output:
    # 遍历主文件夹中的子文件夹
    for root, dirs, files in os.walk(main_folder):
        for file in files:
            if file.endswith('.txt'):  # 仅处理.txt文件
                file_path = os.path.join(root, file)
                # 打开并读取txt文件的内容
                with open(file_path, 'r', encoding='utf-8') as txt_file:
                    content_lines = txt_file.readlines()
                    # 过滤包含关键词的行
                    filtered_lines = [line for line in content_lines if not any(keyword in line for keyword in keywords_to_remove)]
                    # 将过滤后的内容写入输出文件
                    output.writelines(filtered_lines)

print("文本文件合并并删除关键词所在行完成。")

然后根据整合的seizure_summary文件(自己取的名)，将发作部分的数据分割出来。

import os
from scipy.io import loadmat, savemat

# 定义文本文件路径和数据文件夹路径
text_file_path = '.../seizure_summary.txt'  # 文本文件路径
data_folder = '.../CHB-MAT-Filter'  # 包含MAT文件的文件夹路径
output_folder = '.../CHB-MAT-Filter/seizure_segment'  # 保存分割出来的发作数据的文件夹路径

# 创建输出文件夹（如果不存在）
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# 读取文本文件并解析信息
data = []
with open(text_file_path, 'r') as file:
    lines = file.readlines()

record = {}
for line in lines:
    if line.startswith('File_Name'):
        if record:  # 如果之前的记录不为空，添加到data列表
            data.append(record.copy())
        record['file_name'] = line.split(':')[1].strip()
        record['time_ranges'] = []
    elif line.startswith('Seizure_Start_Time'):
        try:
            start_time = int(line.split(':')[1].strip().split('seconds')[0])
        except ValueError:
            # 处理无法转换为整数的情况
            print(f"Error parsing start time in line: {line}")
            continue
        record['time_ranges'].append({'start_time': start_time, 'end_time': None})
    elif line.startswith('Seizure_End_Time'):
        try:
            end_time = int(line.split(':')[1].strip().split('seconds')[0])
        except ValueError:
            # 处理无法转换为整数的情况
            print(f"Error parsing end time in line: {line}")
            continue
        if record['time_ranges']:
            record['time_ranges'][-1]['end_time'] = end_time

# 遍历数据文件夹，切割MAT数据并保存到一个文件夹
for record in data:
    for time_range in record['time_ranges']:
        for root, dirs, files in os.walk(data_folder):
            for file in files:
                if file.endswith('.mat') and record['file_name'] in file:
                    file_path = os.path.join(root, file)

                    try:
                        mat_data = loadmat(file_path)
                    except FileNotFoundError:
                        print(f"MAT file not found: {file_path}")
                        continue

                    start_index = time_range['start_time']  # 开始时间的索引
                    end_index = time_range['end_time']  # 结束时间的索引

                    # 如果开始时间或结束时间缺失，跳过这个时间段
                    if start_index is None or end_index is None:
                        print(f"Start_time or end_time missing for record: {record['file_name']}")
                        continue

                    # 切割MAT数据
                    sliced_data = {}
                    for key, value in mat_data.items():
                        if key.startswith('__') and key.endswith('__'):
                            # 忽略特殊变量，通常是MAT文件中的元数据
                            continue
                        sliced_data[key] = value[:, start_index:end_index]

                    # 保存切割后的MAT数据到输出文件夹
                    output_file_name = f"sliced_{record['file_name']}_{start_index}_{end_index}.mat"
                    output_file_path = os.path.join(output_folder, output_file_name)
                    savemat(output_file_path, sliced_data)
print('分割完成')