首届中国心电智能大赛初赛实现代码pytorch

1. 题目

目标是利用常规静息心电图的电压信号,预测“正常心电图”和“异常心电图”。根据心血管医生对心电图的诊断结果作为金标准,我们将那些没有明显异常的心电图作为“正常心电图”,并将包含一种或更多异常或疾病的心电图作为“异常心电图”。各团队需要在初赛规定时间内,利用训练集中常规心电图的电压信号,设计并实现可预测正常和异常等两类心电图的算法。

2. 数据

完整的训练集和测试集,共1000例常规心电图,其中训练集中包含600例,测试集中共400例。该数据是从多个公开数据集中获取。参赛团队需要利用有正常/异常两类标签的训练集数据设计和实现算法,并在没有标签的测试集上做出预测。

该心电数据的采样率为500 Hz。为了方便参赛团队用不同编程语言都能读取数据,所有心电数据的存储格式为MAT格式。该文件中存储了12个导联的电压信号。训练数据对应的标签存储在txt文件中,其中0代表正常,1代表异常。

3. 分析

  • 数据集共有1000个样本,其中训练集包括600例,测试机400例。训练集是具有label的,用于训练模型;测试集没有label,需要我们用训练好的模型进行预测。
  • 其实就是一个二分类问题
  • 流程包括:数据加载与预处理,模型搭建,模型训练,模型测试

4. pytorch实现

(1)数据加载与处理 (dataset.py)

from scipy.io import loadmat
import os
from torch.utils import data
import pandas as pd
import numpy as np


# 将标签转为OneHot(便于计算损失)
def convert2oneHot(index, Lens):
    hot = np.zeros((Lens,))
    hot[index] = 1
    return hot


#对数据进行归一化
def normalize(v):
    part1 = v - v.mean(axis=1).reshape((v.shape[0], 1))
    part2 = v.max(axis=1).reshape((v.shape[0], 1)) + 2e-12
    return part1 / part2


# 自定义数据加载函数
class MyDataset(data.Dataset):
    def __init__(self, mode, data_path):
        super(MyDataset, self).__init__()

        self.csv_path = os.path.join(data_path, "reference.csv")
        self.data_path = os.path.join(data_path, "TRAIN")
        self.temp_list = []  # mat文件名列表
        self._parse_dataset()

        self.mode = mode.lower()
        if self.mode == 'train':
            self.temp_list = self.temp_list[:500]
        elif self.mode == 'valid':
            self.temp_list = self.temp_list[500:]
        else:
            raise ValueError('mode must be "train" or "valid"!')

    def __getitem__(self, item):
        feature = self.get_feature(self.temp_list[item, 0])
        label = convert2oneHot(self.temp_list[item, 1], 2)
        return feature, label

    def __len__(self):
        return len(self.temp_list)

    def get_feature(self, name):
        mat = loadmat(os.path.join(self.data_path, name))
        dat = mat['data']
        feature = dat[0:12]  # feature: (12, 5000)
        # return normalize(feature).transpose()  # feature: (5000, 12)
        return normalize(feature)
	
	# 读入csv并转成np.array
    def _parse_dataset(self):
        self.temp_list = np.array(pd.read_csv(self.csv_path))

(2)网络模型搭建(model.py)

from torch import nn
# input: (20, 12, 5000)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=12, out_channels=16, kernel_size=16, stride=2, padding=8),
            nn.ReLU(),
            nn.Conv1d(in_channels=16, out_channels=16, kernel_size=16, stride=2, padding=8),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv1d(in_channels=16, out_channels=64, kernel_size=8, stride=2, padding=4),
            nn.ReLU(),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=8, stride=2, padding=4),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv1d(in_channels=256, out_channels=256, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        self.layer5 = nn.Sequential(
            nn.AdaptiveAvgPool1d(2),
            nn.Flatten()
        )

        self.layer6 = nn.Sequential(
            nn.Linear(in_features=256 * 2, out_features=2),
            nn.Dropout(0.3),
            nn.Softmax()
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        return x

(3)模型训练与测试(train.py)

import time
from torch import optim, nn
from torch.utils import data
from dataset import MyDataset
from model import Net
from function import *

# 加载数据
cur_path = os.getcwd()
data_path = os.path.join(cur_path, "data\\preliminary")
train_dataset = MyDataset(mode='train', data_path=data_path)
train_dataloader = data.DataLoader(train_dataset, batch_size=20, shuffle=True)
valid_dataset = MyDataset(mode='valid', data_path=data_path)
valid_dataloader = data.DataLoader(valid_dataset, batch_size=20)

# 实例化模型
model = Net()

# 优化器
optimizer = optim.Adam(model.parameters(), lr=0.0003)

# 损失函数
criterion = nn.CrossEntropyLoss()

# 训练批次
epochs = 50

# 模型保存路径
save_path = 'checkpoint/'
if not os.path.exists(save_path):
    os.mkdir(save_path)

# 训练 & 测试过程
train_acc, valid_acc, train_losses, valid_losses = [], [], [], []
best_acc = 0.0
for epoch in range(epochs):
    epoch_start = time.time()
    model.train()
    train_loss = 0.0
    train_a = 0.0
    valid_loss = 0.0
    valid_a = 0.0
    t0 = 0
    t1 = 0
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs, labels = torch.tensor(inputs, dtype=torch.float), torch.tensor(labels, dtype=torch.float)
        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        ret, predictions = torch.max(outputs.data, 1)  # 返回概率大的值和索引
        labels = labels[:, -1]
        acc = torch.sum(predictions == labels) / outputs.shape[0]
        train_a += acc.item()
        t0 = t0 + 1
    with torch.no_grad():
        model.eval()
        for j, (inputs, labels) in enumerate(valid_dataloader):
            inputs, labels = torch.tensor(inputs, dtype=torch.float), torch.tensor(labels, dtype=torch.float)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            ret, predictions = torch.max(outputs.data, 1)
            labels = labels[:, -1]
            acc = torch.sum(predictions == labels) / outputs.shape[0]
            valid_a += acc.item()
            t1 = t1 + 1
    train_loss = train_loss / t0
    train_a = train_a / t0
    valid_loss = valid_loss / t1
    valid_a = valid_a / t1
    train_acc.append(train_a)
    valid_acc.append(valid_a)
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    epoch_end = time.time()

    # 保存模型
    if valid_a > best_acc:
        torch.save(model.state_dict(), save_path + 'best_model')
        best_acc = valid_a
    if epoch == epochs - 1:
        torch.save(model.state_dict(), save_path + 'final_model')

    print("Epoch: {}/{}, Training:\tLoss: {:.4f}, Accuracy: {:.2f}%, "
          "\t\tValidation:\tLoss: {:.4f}, Accuracy: {:.2f}%, Time: {:.4f}s".format(
        epoch + 1, epochs, train_loss, train_a * 100, valid_loss, valid_a * 100,
        epoch_end - epoch_start))

print("training end.best_model save to checkpoint.")

plot_acc(train_acc, valid_acc)
plot_loss(train_losses, valid_losses)
plot_results(epochs, train_acc, train_losses, valid_acc, train_losses)

(4)模型预测 (demo.py)

from torch.autograd import Variable
from glob import glob
from model import Net
from function import *
from scipy.io import loadmat


def normalize(v):
    part1 = v - v.mean(axis=1).reshape((v.shape[0], 1))
    part2 = v.max(axis=1).reshape((v.shape[0], 1)) + 2e-12
    return part1 / part2


def load_data(path):
    mat = loadmat(path)
    dat = mat['data']
    feature = dat[0:12]  # feature: (12, 5000)
    return normalize(feature)


def save_txt(files, predictions, txt_path):
    a = open(txt_path, "w", encoding='UTF-8')
    for i in range(len(files)):
        a.write(files[i] + ' ' + str(predictions[i].item()) + '\n')
    a.close()


if __name__ == '__main__':
    # 加载数据
    cur_path = os.getcwd()
    data_path = os.path.join(cur_path, "data\\preliminary\\TEST")
    files = glob(data_path + '/*.mat')

    # 加载模型
    model = Net()
    model.load_state_dict(torch.load(os.path.join(cur_path, 'checkpoint\\best_model')))
    model.eval()

    predictions = []
    file_list = []
    for file in files:
        inputs = load_data(file)
        inputs = torch.tensor(inputs, dtype=torch.float)
        inputs = Variable(torch.unsqueeze(inputs, dim=0).float())
        pred = model(inputs)
        ret, pred_ = torch.max(pred.data, 1)
        name = os.path.basename(file)
        file_list.append(name)
        predictions.append(pred_)
        # name = file.split('\\')[-1]

        print(f'{name} \t {pred_.item()}')
    save_txt(file_list, predictions, 'pred_result.txt')

(5)功能函数(function.py)

# 解决中文显示问题
import os
import pandas as pd
import numpy as np
import torch
from matplotlib import pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


# 定义画图函数
def plot_loss(train_loss, val_loss):
    plt.plot(train_loss, label='train_loss')
    plt.plot(val_loss, label='val_loss')
    plt.legend(loc='best')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.title("训练集和验证集loss值得对比图")
    plt.savefig('results/loss.png')
    plt.show()


def plot_acc(train_acc, val_acc):
    plt.plot(train_acc, label='train_acc')
    plt.plot(val_acc, label='val_acc')
    plt.legend(loc='best')
    plt.ylabel('acc')
    plt.xlabel('epoch')
    plt.title("训练集和验证集acc值得对比图")
    plt.savefig('results/acc.png')
    plt.show()


def plot_results(epochs, train_acc, train_loss, test_acc, test_loss):
    x = np.arange(epochs)

    plt.plot(x, train_acc, label='train_acc')
    plt.plot(x, train_loss, label='train_loss')
    plt.plot(x, test_acc, label='test_acc')
    plt.plot(x, test_loss, label='test_loss')

    plt.title("Results", fontsize=15)
    plt.xlabel("X", fontsize=13)
    plt.ylabel("Y", fontsize=13)
    plt.legend()
    plt.savefig('results/result.png')
    plt.show()


# 读取reference.txt打乱顺序保存到reference.csv
def create_csv(txt_path, csv_path):
    lists = pd.read_csv(txt_path, sep=r"\t", header=None)
    lists = lists.sample(frac=1)
    lists.to_csv(csv_path, index=None)
    print("Finish save csv")
  • 3
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 7
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

An_37

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值