rppg信号预测血压

时间刻沙

已于 2023-10-12 09:14:34 修改

阅读量816

点赞数

文章标签： python 深度学习

于 2023-10-12 09:08:18 首次发布

本文链接：https://blog.csdn.net/etchtime/article/details/133782848

版权

1.数据集介绍

MIMIC-III_ppg_dataset为MIMIC-III数据库中提取出来的包含ppg信号和血压的数据集

ppg频率为125hz（https://www.physionet.org/content/mimic3wdb/1.0/），长度为875，

5s一条ppg信号记录

MIMIC-III_ppg_dataset.h5数据集以及处理的代码来源于

https://github.com/Fabian-Sc85/non-invasive-bp-estimation-using-deep-learning

non-invasive-bp-estimation-using-deep-learning-main为上述链接的代码

h5_to_txt.py为将数据提取出来代码，接受两个参数

input 需要处理的h5文件的路径

output处理之后输出的路径

数据集的真值会存放在当前路径的三个csv文件中

输出的文件夹分为train,test,val。每个文件夹中每1000条数据又划分为一个小的文件夹，每个文件的文件名对应csv中的Index

h5_to_txt.py代码如下，需要tensorflow

import matplotlib.pyplot as plt
import argparse
import pandas as pd
import h5py
import tensorflow as tf
# ks.enable_eager_execution()
import numpy as np
from sklearn.model_selection import train_test_split

from datetime import datetime
from os.path import expanduser, isdir, join
from os import mkdir
from sys import argv

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))


def _bytes_feature(value):
    #    if isinstance(value, type(ks.constant(0))):
    #        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def write_array_to_file(file_path, array):
    try:
        with open(file_path, 'w') as file:
            for item in array:
                file.write(str(item) + '\n')
        # print(f"成功将数组写入文件 {file_path}")
    except IOError:
        print(f"无法写入文件 {file_path}")

def ppg_hdf2tfrecord(h5_file, tfrecord_path, samp_idx, weights_SBP=None, weights_DBP=None):
    # Function that converts PPG/BP sample pairs into the binary .tfrecord file format. This function creates a .tfrecord
    # file containing a defined number os samples
    #
    # Parameters:
    # h5_file: file containing ppg and BP data
    # tfrecordpath: full path for storing the .tfrecord files
    # samp_idx: sample indizes of the data in the .h5 file to be stored in the .tfrecord file
    # weights_SBP: sample weights for the systolic BP (optional)
    # weights_DBP: sample weights for the diastolic BP (optional)

    N_samples = len(samp_idx)
    # open the .h5 file and get the samples with the indizes specified by samp_idx
    with h5py.File(h5_file, 'r') as f:
        # load ppg and BP data as well as the subject numbers the samples belong to
        ppg_h5 = f.get('/ppg')
        BP = f.get('/label')
        subject_idx = f.get('/subject_idx')

        for i in np.nditer(samp_idx):
            # print(i)
            ppg = np.array(ppg_h5[i,:])
            filename = str(i)+".txt"
            filepath = join(tfrecord_path,filename)
            write_array_to_file(filepath,ppg)


def ppg_hdf2tfrecord_sharded(h5_file, samp_idx, tfrecordpath, Nsamp_per_shard, modus='train', weights_SBP=None,
                         weights_DBP=None):
    # Save PPG/BP pairs as .tfrecord files. Save defined number os samples per file (Sharding)
    # Weights can be defined for each sample
    #
    # Parameters:
    # h5_file: File that contains the whole dataset (in .h5 format), created by
    # samp_idx: sample indizes from the dataset in the h5. file that are used to create this tfrecords dataset
    # tfrecordpath: full path for storing the .tfrecord files
    # N_samp_per_shard: number of samples per shard/.tfrecord file
    # modus: define if the data is stored in the "train", "val" or "test" subfolder of "tfrecordpath"
    # weights_SBP: sample weights for the systolic BP (optional)
    # weights_DBP: sample weights for the diastolic BP (optional)

    N_samples = len(samp_idx)

    # calculate the number of Files/shards that are needed to stroe the whole dataset
    N_shards = np.ceil(N_samples / Nsamp_per_shard).astype(int)

    # iterate over every shard
    for i in range(N_shards):
        idx_start = i * Nsamp_per_shard
        idx_stop = (i + 1) * Nsamp_per_shard
        if idx_stop > N_samples:
            idx_stop = N_samples

        idx_curr = samp_idx[idx_start:idx_stop]
        now = datetime.now()
        dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
        print(dt_string, ': processing ',
              modus,
              ' shard ', str(i + 1), ' of ', str(N_shards))
        record_dir = join(tfrecordpath,str(i+1))
        if not isdir(record_dir):
            mkdir(record_dir)
        # print(idx_curr)
        ppg_hdf2tfrecord(h5_file, record_dir, idx_curr, weights_SBP=weights_SBP, weights_DBP=weights_DBP)


def h5_to_tfrecords(SourceFile, tfrecordsPath, N_train=1e6, N_val=2.5e5, N_test=2.5e5,
                    divide_by_subject=True, save_tfrecords=True):
    N_train = int(N_train)
    N_val = int(N_val)
    N_test = int(N_test)

    print(N_train)

    tfrecord_path_train = join(tfrecordsPath, 'train')
    if not isdir(tfrecord_path_train):
        mkdir(tfrecord_path_train)
    tfrecord_path_val = join(tfrecordsPath, 'val')
    if not isdir(tfrecord_path_val):
        mkdir(tfrecord_path_val)
    tfrecord_path_test = join(tfrecordsPath, 'test')
    if not isdir(tfrecord_path_test):
        mkdir(tfrecord_path_test)

    csv_path = tfrecordsPath

    Nsamp_per_shard = 1000

    with h5py.File(SourceFile, 'r') as f:
        BP = np.array(f.get('/label'))
        BP = np.round(BP)
        BP = np.transpose(BP)
        subject_idx = np.squeeze(np.array(f.get('/subject_idx')))
    
    N_samp_total = BP.shape[1]
    subject_idx = subject_idx[:N_samp_total]
    print("total data number:",N_samp_total)

    # Divide the dataset into training, validation and test set
    # -------------------------------------------------------------------------------
    if divide_by_subject is True:
        valid_idx = np.arange(subject_idx.shape[-1])

        # divide the subjects into training, validation and test subjects
        subject_labels = np.unique(subject_idx)
        subjects_train_labels, subjects_val_labels = train_test_split(subject_labels, test_size=0.5)
        subjects_val_labels, subjects_test_labels = train_test_split(subjects_val_labels, test_size=0.5)

        # Calculate samples belong to training, validation and test subjects
        train_part = valid_idx[np.isin(subject_idx,subjects_train_labels)]
        val_part = valid_idx[np.isin(subject_idx,subjects_val_labels)]
        test_part = valid_idx[np.isin(subject_idx, subjects_test_labels)]

        # draw a number samples defined by N_train, N_val and N_test from the training, validation and test subjects
        idx_train = np.random.choice(train_part, N_train, replace=False)
        idx_val = np.random.choice(val_part, N_val, replace=False)
        idx_test = np.random.choice(test_part, N_test, replace=False)
    else:
        # Create a subset of the whole dataset by drawing a number of subjects from the dataset. The total number of
        # samples contributed by those subjects must equal N_train + N_val + _N_test
        subject_labels, SampSubject_hist = np.unique(subject_idx, return_counts=True)
        cumsum_samp = np.cumsum(SampSubject_hist)
        subject_labels_train = subject_labels[:np.nonzero(cumsum_samp>(N_train+N_val+N_test))[0][0]]
        idx_valid = np.nonzero(np.isin(subject_idx,subject_labels_train))[0]

        # divide subset randomly into training, validation and test set
        idx_train, idx_val = train_test_split(idx_valid, train_size= N_train, test_size=N_val+N_test)
        idx_val, idx_test = train_test_split(idx_val, test_size=0.5)

    # save ground truth BP values of training, validation and test set in csv-files for future reference
    BP_train = BP[:,idx_train]
    d = {"Index":idx_train,"SBP": np.transpose(BP_train[0, :]), "DBP": np.transpose(BP_train[1, :])}
    train_set = pd.DataFrame(d)
    train_set.to_csv(csv_path + 'MIMIC-III_BP_trainset.csv')
    BP_val = BP[:,idx_val]
    d = {"Index":idx_val,"SBP": np.transpose(BP_val[0, :]), "DBP": np.transpose(BP_val[1, :])}
    train_set = pd.DataFrame(d)
    train_set.to_csv(csv_path + 'MIMIC-III_BP_valset.csv')
    BP_test = BP[:,idx_test]
    d = {"Index":idx_test,"SBP": np.transpose(BP_test[0, :]), "DBP": np.transpose(BP_test[1, :])}
    train_set = pd.DataFrame(d)
    train_set.to_csv(csv_path + 'MIMIC-III_BP_testset.csv')

    print("train number:",len(idx_train))
    print("test number:",len(idx_test))
    print("val number:",len(idx_val))

    # create tfrecord dataset
    # ----------------------------
    if save_tfrecords:
        np.random.shuffle(idx_train)
        print("start generate test data")
        # ppg_hdf2tfrecord_sharded(SourceFile, idx_test, tfrecord_path_test, Nsamp_per_shard, modus='test')
        print("start generate train data")
        # ppg_hdf2tfrecord_sharded(SourceFile, idx_train, tfrecord_path_train, Nsamp_per_shard, modus='train')
        print("start generate val data")
        ppg_hdf2tfrecord_sharded(SourceFile, idx_val, tfrecord_path_val, Nsamp_per_shard, modus='val')
    print("Script finished")

if __name__ == "__main__":
    np.random.seed(seed=42)

    if len(argv) > 1:
        parser = argparse.ArgumentParser()
        parser.add_argument('--input', type=str, help="Path to the .h5 file containing the dataset")
        parser.add_argument('--output', type=str, help="Target folder for the .tfrecord files")
        parser.add_argument('--ntrain', type=int, default=1e6,
                            help="Number of samples in the training set (default: 1e6)")
        parser.add_argument('--nval', type=int, default=2.5e5,
                            help="Number of samples in the validation set (default: 2.5e5)")
        parser.add_argument('--ntest', type=int, default=2.5e5,
                            help="Number of samples in the test set (default: 2.5e5)")
        parser.add_argument('--divbysubj', type=int, default=1,
                            help="Perform subject based (1) or sample based (0) division of the dataset")
        args = parser.parse_args()
        SourceFile = args.input
        tfrecordsPath = args.output
        divbysubj = True if args.divbysubj == 1 else False

        N_train = int(args.ntrain)
        N_val = int(args.nval)
        N_test = int(args.ntest)
    else:
        HomePath = expanduser("~")
        SourceFile = join(HomePath, 'data', 'MIMIC-III_BP', 'MIMIC-III_ppg_dataset.h5')
        tfrecordsPath = join(HomePath, 'test')
        divbysubj = True
        N_train = 1e6
        N_val = 2.5e5
        N_test = 2.5e5

    h5_to_tfrecords(SourceFile=SourceFile, tfrecordsPath=tfrecordsPath, divide_by_subject=divbysubj,
                      N_train=N_train, N_val=N_val, N_test=N_test)

日志部分代码

import logging
import time
import os

def getlog():
    logger = logging.getLogger('test')
    logger.setLevel(level=logging.DEBUG)

    formatter = logging.Formatter('%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s')
    logdir = "./log"
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    log_filename = str(time.strftime('%m%d-%H%M%S', time.localtime()))+".log"
    log_filename = os.path.join(logdir,log_filename)
    file_handler = logging.FileHandler(log_filename)
    file_handler.setLevel(level=logging.INFO)
    file_handler.setFormatter(formatter)

    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.INFO)
    stream_handler.setFormatter(formatter)

    logger.addHandler(file_handler)
    logger.addHandler(stream_handler)
    return logger


if __name__ == "__main__":
    log = getlog()
    log.info("hello log")

2.模型选择

模型选用resnet，具体根据需要可以选用resnet34，resnet50或者更高的层数，由于输入是一维，所以需要改造传统renet网络，改造后如下，代码主体来源于pytorch自带resnet模型：

from functools import partial
from typing import Any, Callable, List, Optional, Type, Union

import torch
import torch.nn as nn
from torch import Tensor


def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 0) -> nn.Conv1d:
    """3x3 convolution with padding"""
    return nn.Conv1d(
        in_planes,
        out_planes,
        kernel_size=1,
        stride=stride,
        padding=dilation
    )


def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv1d:
    """1x1 convolution"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 0,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm1d
        if groups != 1 or base_width != 64:
            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride, dilation=dilation)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        # print("out = self.conv1(x)")
        out = self.bn1(out)
        # print("out = self.bn1(out)")
        out = self.relu(out)
        # print("out = self.relu(out)")
        out = self.conv2(out)
        # print("out = self.conv2(out)")
        out = self.bn2(out)
        # print("out = self.bn2(out)")
        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 0,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm1d
        width = int(planes * (base_width / 64.0)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(
        self,
        block: Type[Union[BasicBlock, Bottleneck]],
        layers: List[int],
        num_classes: int = 1000,
        layernumber: int = 34,
        zero_init_residual: bool = False,
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm1d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 0
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError(
                "replace_stride_with_dilation should be None "
                f"or a 3-element tuple, got {replace_stride_with_dilation}"
            )
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv1d(1, self.inplanes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=1, stride=1, padding=0)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AvgPool1d(kernel_size=1, stride=1, padding=0)
        # self.fc = nn.Linear(512 * 438, num_classes)
        outshape1 = 512 * 110
        self.fc1 = nn.Linear(outshape1, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm1d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck) and m.bn3.weight is not None:
                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]

    def _make_layer(
        self,
        block: Type[Union[BasicBlock, Bottleneck]],
        planes: int,
        blocks: int,
        stride: int = 1,
        dilate: bool = False,
    ) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(
            block(
                self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
            )
        )
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(
                block(
                    self.inplanes,
                    planes,
                    groups=self.groups,
                    base_width=self.base_width,
                    dilation=self.dilation,
                    norm_layer=norm_layer,
                )
            )

        return nn.Sequential(*layers)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # print(x.shape)
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        # print("layer1",x.shape)
        x = self.layer2(x)
        # print("layer2",x.shape)
        x = self.layer3(x)
        # print("layer3",x.shape)
        x = self.layer4(x)
        # print("layer4",x.shape)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.fc2(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def _resnet(
    block: Type[Union[BasicBlock, Bottleneck]],
    layers: List[int],
    progress: bool,
    **kwargs: Any,
) -> ResNet:
    model = ResNet(block, layers, **kwargs)
    return model

def resnet34(*, progress: bool = True, **kwargs: Any) -> ResNet:
    return _resnet(BasicBlock, [3, 4, 6, 3], progress, **kwargs)


def resnet50(*, progress: bool = True, **kwargs: Any) -> ResNet:
    return _resnet(Bottleneck, [3, 4, 6, 3], progress, **kwargs)


def resnet101(*, progress: bool = True, **kwargs: Any) -> ResNet:
   return _resnet(Bottleneck, [3, 4, 23, 3], progress, **kwargs)

3.dataset代码如下,data_length为后续使用的数据长度


from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

def readFile(filename):
    data = []
    with open(filename, 'r') as file:
        for line in file:
            line = line.strip() # 去除行末的换行符
            #print(line)
            data.append(float(line))
    return np.array(data)

class mmic(Dataset):

    def __init__(self, csv_file, root_dir, data_length=10000, transform=None):
        

        self.bp = pd.read_csv(csv_file,index_col="Index")
        self.root_dir = root_dir
        self.transform = transform
        self.to_tensor = transforms.ToTensor()
        self.dirlist = os.listdir(root_dir)
        self.datalen = data_length
        self.filelist = []
        self.indexlist = []
        for dirname in self.dirlist:
            dirpath = os.path.join(self.root_dir,dirname)
            files = os.listdir(dirpath)
            for filename in files:
                index = filename[:-4]
                self.indexlist.append(index)
                filename = os.path.join(dirpath,filename)
                self.filelist.append(filename)

    def __len__(self):

        # return len(self.bp)
        return self.datalen

    def __getitem__(self, idx):
        filename = self.filelist[idx]
        index = self.indexlist[idx]
        ppg = readFile(filename)
        # 归一化
        min_ppg = np.min(ppg)
        max_ppg = np.max(ppg)
        normalize_ppg = (ppg-min_ppg)/(max_ppg-min_ppg)

        bpdata = self.bp.loc[int(index)].to_dict()
        sbp = bpdata['SBP']
        dbp = bpdata['DBP']
        label = np.array([(sbp-40)/(200-40),(dbp-40)/(120-40)])
        # print(index,sbp,dbp)

        return normalize_ppg,label

if __name__ == "__main__":
    dataset = mmic(csv_file='outputMIMIC-III_BP_testset.csv',
                                    root_dir='test')
    dataset.__getitem__(10)

4.训练代码如下，学习率和衰减策略可以自己定，需要修改读取csv的目录和root目录

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from custom_data import bpdata_train,bpdata_test
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics  
import torchvision.models as models
from read_mmic import mmic
from tqdm import tqdm

from resnet import resnet34,resnet50

from log import getlog
log = getlog()

log.info("Start Train")

bp_train_dataset = mmic(csv_file='outputMIMIC-III_BP_trainset.csv',
                                    root_dir='train',
                                    data_length = 102400)

# Hyper parameters
num_epochs = 200
num_classes = 1
batch_size = 256
learning_rate = 0.00001


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



train_loader = torch.utils.data.DataLoader(dataset=bp_train_dataset,
                                           batch_size=batch_size, 
                                           num_workers = 8,
                                           shuffle=True)


# pdb.set_trace()
model = resnet50(num_classes=2,layernumber=50).to(device)
min_loss = 10
# Loss and optimizer
criterion = torch.nn.MSELoss(size_average = False) 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2, last_epoch=-1)


# Train the model
total_step = len(train_loader)
# model.load_state_dict(torch.load("./modelbak/model150.ckpt"))
print(model)

log.info(str("total_step:"+total_step))
for epoch in range(num_epochs):
    for i,(data,label) in enumerate(tqdm(train_loader)):
        # print(i)
        #print(data)
        # data = data.to(device)
        # label = label.to(device)
        
        # Forward pass
        # data = np.array(data))\
        # print(label)
        label = torch.tensor(label).to(device)
        data = torch.tensor(data).to(device)
        data = data.unsqueeze(0)
        data = data.permute(1, 0, 2)
        # data = data.reshape(batch_size,1,1000)
        data = data.to(torch.float32)
        label = label.to(torch.float32)
        outputs = model(data)
        # outputs = outputs[0]
        loss = criterion(outputs, label)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    log.info('Epoch [{}/{}], Loss: {:.4f}' 
                    .format(epoch+1, num_epochs, loss.item()))
    if epoch%10 == 0:
        torch.save(model.state_dict(), './model/model%d.ckpt'%epoch)
    if loss<min_loss:
        min_loss = loss
        torch.save(model.state_dict(), './model/model.ckpt')
    lr_scheduler.step()
    # print("第%d个epoch的学习率：%f" % (epoch+1, optimizer.param_groups[0]['lr']))
    #     break
    # break

5.测试代码如下

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from custom_data import bpdata_train,bpdata_test
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics  
import torch.nn.functional as F
from read_mmic import mmic
from resnet import resnet34,resnet50
import os
from log import getlog
log = getlog()

log.info("Start Test")

batch_size = 32

bp_train_dataset = mmic(csv_file='outputMIMIC-III_BP_trainset.csv',
                                    root_dir='train')

bp_test_dataset = mmic(csv_file='outputMIMIC-III_BP_testset.csv',
                                    root_dir='test',data_length=2500)

device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

test_loader = torch.utils.data.DataLoader(dataset=bp_test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

train_loader = torch.utils.data.DataLoader(dataset=bp_train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)


path = "./model"
modelist = os.listdir(path)
model = resnet50(num_classes=2,layernumber=50).to(device)
data_loader = train_loader
# log.info("test on train dataset")
data_loader = test_loader
log.info("test on test dataset")
for file_path in modelist:
    # file_path = "./model/model83.ckpt"
    #file_path = "model.ckpt"
    file_path = os.path.join(path,file_path)
    model.load_state_dict(torch.load(file_path))
    model.eval()
    output_list = list()
    label_list = list()
    sbp_list = list()
    sbp_label_list = list()
    dbp_list = list()
    dbp_label_list = list()
    for i,(data,label) in enumerate(data_loader):
        label = torch.tensor(label).float().to(device)
        # print(label)
        data = torch.tensor(data).float().to(device)
        data = data.unsqueeze(0)
        data = data.permute(1, 0, 2)
        with torch.no_grad():
            outputs = model(data)
        outputs = outputs.cpu().detach().numpy()[0]
        label = label.cpu().numpy()[0]
        # print("outputs:",outputs)
        # print("label",label)
        outputs = [outputs[0]*160+40,outputs[1]*80+40]
        label = [label[0]*160+40,label[1]*80+40]
        # print("outputs:",outputs)
        # print("label",label)
        output_list.append(outputs)
        label_list.append(label)
        sbp_list.append(outputs[0])
        sbp_label_list.append(label[0])
        dbp_list.append(outputs[1])
        dbp_label_list.append(label[1])
        #print(outputs,label)
        # break
    log.info('Testing MAE in {}: {} '.format(file_path,metrics.mean_absolute_error(label_list, output_list)))

用51200条数据进行训练，2500条数据测试

最后在测试集上SBP和DBP合起来的MAE为14,单独SBP的MAE为19.1，DBP的MAE为9.5