1.数据集介绍
MIMIC-III_ppg_dataset为MIMIC-III数据库中提取出来的包含ppg信号和血压的数据集
ppg频率为125hz(https://www.physionet.org/content/mimic3wdb/1.0/),长度为875,
5s一条ppg信号记录
MIMIC-III_ppg_dataset.h5数据集以及处理的代码来源于
https://github.com/Fabian-Sc85/non-invasive-bp-estimation-using-deep-learning
non-invasive-bp-estimation-using-deep-learning-main为上述链接的代码
h5_to_txt.py为将数据提取出来代码,接受两个参数
input 需要处理的h5文件的路径
output处理之后输出的路径
数据集的真值会存放在当前路径的三个csv文件中
输出的文件夹分为train,test,val。每个文件夹中每1000条数据又划分为一个小的文件夹,每个文件的文件名对应csv中的Index
h5_to_txt.py代码如下,需要tensorflow
import matplotlib.pyplot as plt
import argparse
import pandas as pd
import h5py
import tensorflow as tf
# ks.enable_eager_execution()
import numpy as np
from sklearn.model_selection import train_test_split
from datetime import datetime
from os.path import expanduser, isdir, join
from os import mkdir
from sys import argv
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _bytes_feature(value):
# if isinstance(value, type(ks.constant(0))):
# value = value.numpy()
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def write_array_to_file(file_path, array):
try:
with open(file_path, 'w') as file:
for item in array:
file.write(str(item) + '\n')
# print(f"成功将数组写入文件 {file_path}")
except IOError:
print(f"无法写入文件 {file_path}")
def ppg_hdf2tfrecord(h5_file, tfrecord_path, samp_idx, weights_SBP=None, weights_DBP=None):
# Function that converts PPG/BP sample pairs into the binary .tfrecord file format. This function creates a .tfrecord
# file containing a defined number os samples
#
# Parameters:
# h5_file: file containing ppg and BP data
# tfrecordpath: full path for storing the .tfrecord files
# samp_idx: sample indizes of the data in the .h5 file to be stored in the .tfrecord file
# weights_SBP: sample weights for the systolic BP (optional)
# weights_DBP: sample weights for the diastolic BP (optional)
N_samples = len(samp_idx)
# open the .h5 file and get the samples with the indizes specified by samp_idx
with h5py.File(h5_file, 'r') as f:
# load ppg and BP data as well as the subject numbers the samples belong to
ppg_h5 = f.get('/ppg')
BP = f.get('/label')
subject_idx = f.get('/subject_idx')
for i in np.nditer(samp_idx):
# print(i)
ppg = np.array(ppg_h5[i,:])
filename = str(i)+".txt"
filepath = join(tfrecord_path,filename)
write_array_to_file(filepath,ppg)
def ppg_hdf2tfrecord_sharded(h5_file, samp_idx, tfrecordpath, Nsamp_per_shard, modus='train', weights_SBP=None,
weights_DBP=None):
# Save PPG/BP pairs as .tfrecord files. Save defined number os samples per file (Sharding)
# Weights can be defined for each sample
#
# Parameters:
# h5_file: File that contains the whole dataset (in .h5 format), created by
# samp_idx: sample indizes from the dataset in the h5. file that are used to create this tfrecords dataset
# tfrecordpath: full path for storing the .tfrecord files
# N_samp_per_shard: number of samples per shard/.tfrecord file
# modus: define if the data is stored in the "train", "val" or "test" subfolder of "tfrecordpath"
# weights_SBP: sample weights for the systolic BP (optional)
# weights_DBP: sample weights for the diastolic BP (optional)
N_samples = len(samp_idx)
# calculate the number of Files/shards that are needed to stroe the whole dataset
N_shards = np.ceil(N_samples / Nsamp_per_shard).astype(int)
# iterate over every shard
for i in range(N_shards):
idx_start = i * Nsamp_per_shard
idx_stop = (i + 1) * Nsamp_per_shard
if idx_stop > N_samples:
idx_stop = N_samples
idx_curr = samp_idx[idx_start:idx_stop]
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print(dt_string, ': processing ',
modus,
' shard ', str(i + 1), ' of ', str(N_shards))
record_dir = join(tfrecordpath,str(i+1))
if not isdir(record_dir):
mkdir(record_dir)
# print(idx_curr)
ppg_hdf2tfrecord(h5_file, record_dir, idx_curr, weights_SBP=weights_SBP, weights_DBP=weights_DBP)
def h5_to_tfrecords(SourceFile, tfrecordsPath, N_train=1e6, N_val=2.5e5, N_test=2.5e5,
divide_by_subject=True, save_tfrecords=True):
N_train = int(N_train)
N_val = int(N_val)
N_test = int(N_test)
print(N_train)
tfrecord_path_train = join(tfrecordsPath, 'train')
if not isdir(tfrecord_path_train):
mkdir(tfrecord_path_train)
tfrecord_path_val = join(tfrecordsPath, 'val')
if not isdir(tfrecord_path_val):
mkdir(tfrecord_path_val)
tfrecord_path_test = join(tfrecordsPath, 'test')
if not isdir(tfrecord_path_test):
mkdir(tfrecord_path_test)
csv_path = tfrecordsPath
Nsamp_per_shard = 1000
with h5py.File(SourceFile, 'r') as f:
BP = np.array(f.get('/label'))
BP = np.round(BP)
BP = np.transpose(BP)
subject_idx = np.squeeze(np.array(f.get('/subject_idx')))
N_samp_total = BP.shape[1]
subject_idx = subject_idx[:N_samp_total]
print("total data number:",N_samp_total)
# Divide the dataset into training, validation and test set
# -------------------------------------------------------------------------------
if divide_by_subject is True:
valid_idx = np.arange(subject_idx.shape[-1])
# divide the subjects into training, validation and test subjects
subject_labels = np.unique(subject_idx)
subjects_train_labels, subjects_val_labels = train_test_split(subject_labels, test_size=0.5)
subjects_val_labels, subjects_test_labels = train_test_split(subjects_val_labels, test_size=0.5)
# Calculate samples belong to training, validation and test subjects
train_part = valid_idx[np.isin(subject_idx,subjects_train_labels)]
val_part = valid_idx[np.isin(subject_idx,subjects_val_labels)]
test_part = valid_idx[np.isin(subject_idx, subjects_test_labels)]
# draw a number samples defined by N_train, N_val and N_test from the training, validation and test subjects
idx_train = np.random.choice(train_part, N_train, replace=False)
idx_val = np.random.choice(val_part, N_val, replace=False)
idx_test = np.random.choice(test_part, N_test, replace=False)
else:
# Create a subset of the whole dataset by drawing a number of subjects from the dataset. The total number of
# samples contributed by those subjects must equal N_train + N_val + _N_test
subject_labels, SampSubject_hist = np.unique(subject_idx, return_counts=True)
cumsum_samp = np.cumsum(SampSubject_hist)
subject_labels_train = subject_labels[:np.nonzero(cumsum_samp>(N_train+N_val+N_test))[0][0]]
idx_valid = np.nonzero(np.isin(subject_idx,subject_labels_train))[0]
# divide subset randomly into training, validation and test set
idx_train, idx_val = train_test_split(idx_valid, train_size= N_train, test_size=N_val+N_test)
idx_val, idx_test = train_test_split(idx_val, test_size=0.5)
# save ground truth BP values of training, validation and test set in csv-files for future reference
BP_train = BP[:,idx_train]
d = {"Index":idx_train,"SBP": np.transpose(BP_train[0, :]), "DBP": np.transpose(BP_train[1, :])}
train_set = pd.DataFrame(d)
train_set.to_csv(csv_path + 'MIMIC-III_BP_trainset.csv')
BP_val = BP[:,idx_val]
d = {"Index":idx_val,"SBP": np.transpose(BP_val[0, :]), "DBP": np.transpose(BP_val[1, :])}
train_set = pd.DataFrame(d)
train_set.to_csv(csv_path + 'MIMIC-III_BP_valset.csv')
BP_test = BP[:,idx_test]
d = {"Index":idx_test,"SBP": np.transpose(BP_test[0, :]), "DBP": np.transpose(BP_test[1, :])}
train_set = pd.DataFrame(d)
train_set.to_csv(csv_path + 'MIMIC-III_BP_testset.csv')
print("train number:",len(idx_train))
print("test number:",len(idx_test))
print("val number:",len(idx_val))
# create tfrecord dataset
# ----------------------------
if save_tfrecords:
np.random.shuffle(idx_train)
print("start generate test data")
# ppg_hdf2tfrecord_sharded(SourceFile, idx_test, tfrecord_path_test, Nsamp_per_shard, modus='test')
print("start generate train data")
# ppg_hdf2tfrecord_sharded(SourceFile, idx_train, tfrecord_path_train, Nsamp_per_shard, modus='train')
print("start generate val data")
ppg_hdf2tfrecord_sharded(SourceFile, idx_val, tfrecord_path_val, Nsamp_per_shard, modus='val')
print("Script finished")
if __name__ == "__main__":
np.random.seed(seed=42)
if len(argv) > 1:
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=str, help="Path to the .h5 file containing the dataset")
parser.add_argument('--output', type=str, help="Target folder for the .tfrecord files")
parser.add_argument('--ntrain', type=int, default=1e6,
help="Number of samples in the training set (default: 1e6)")
parser.add_argument('--nval', type=int, default=2.5e5,
help="Number of samples in the validation set (default: 2.5e5)")
parser.add_argument('--ntest', type=int, default=2.5e5,
help="Number of samples in the test set (default: 2.5e5)")
parser.add_argument('--divbysubj', type=int, default=1,
help="Perform subject based (1) or sample based (0) division of the dataset")
args = parser.parse_args()
SourceFile = args.input
tfrecordsPath = args.output
divbysubj = True if args.divbysubj == 1 else False
N_train = int(args.ntrain)
N_val = int(args.nval)
N_test = int(args.ntest)
else:
HomePath = expanduser("~")
SourceFile = join(HomePath, 'data', 'MIMIC-III_BP', 'MIMIC-III_ppg_dataset.h5')
tfrecordsPath = join(HomePath, 'test')
divbysubj = True
N_train = 1e6
N_val = 2.5e5
N_test = 2.5e5
h5_to_tfrecords(SourceFile=SourceFile, tfrecordsPath=tfrecordsPath, divide_by_subject=divbysubj,
N_train=N_train, N_val=N_val, N_test=N_test)
日志部分代码
import logging
import time
import os
def getlog():
logger = logging.getLogger('test')
logger.setLevel(level=logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s')
logdir = "./log"
if not os.path.exists(logdir):
os.mkdir(logdir)
log_filename = str(time.strftime('%m%d-%H%M%S', time.localtime()))+".log"
log_filename = os.path.join(logdir,log_filename)
file_handler = logging.FileHandler(log_filename)
file_handler.setLevel(level=logging.INFO)
file_handler.setFormatter(formatter)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
stream_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(stream_handler)
return logger
if __name__ == "__main__":
log = getlog()
log.info("hello log")
2.模型选择
模型选用resnet,具体根据需要可以选用resnet34,resnet50或者更高的层数,由于输入是一维,所以需要改造传统renet网络,改造后如下,代码主体来源于pytorch自带resnet模型:
from functools import partial
from typing import Any, Callable, List, Optional, Type, Union
import torch
import torch.nn as nn
from torch import Tensor
def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 0) -> nn.Conv1d:
"""3x3 convolution with padding"""
return nn.Conv1d(
in_planes,
out_planes,
kernel_size=1,
stride=stride,
padding=dilation
)
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv1d:
"""1x1 convolution"""
return nn.Conv1d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 0,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm1d
if groups != 1 or base_width != 64:
raise ValueError("BasicBlock only supports groups=1 and base_width=64")
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride, dilation=dilation)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
# print("out = self.conv1(x)")
out = self.bn1(out)
# print("out = self.bn1(out)")
out = self.relu(out)
# print("out = self.relu(out)")
out = self.conv2(out)
# print("out = self.conv2(out)")
out = self.bn2(out)
# print("out = self.bn2(out)")
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 0,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm1d
width = int(planes * (base_width / 64.0)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(
self,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
num_classes: int = 1000,
layernumber: int = 34,
zero_init_residual: bool = False,
groups: int = 1,
width_per_group: int = 64,
replace_stride_with_dilation: Optional[List[bool]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm1d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 0
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
f"or a 3-element tuple, got {replace_stride_with_dilation}"
)
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv1d(1, self.inplanes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool1d(kernel_size=1, stride=1, padding=0)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AvgPool1d(kernel_size=1, stride=1, padding=0)
# self.fc = nn.Linear(512 * 438, num_classes)
outshape1 = 512 * 110
self.fc1 = nn.Linear(outshape1, 1024)
self.fc2 = nn.Linear(1024, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, (nn.BatchNorm1d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck) and m.bn3.weight is not None:
nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def _make_layer(
self,
block: Type[Union[BasicBlock, Bottleneck]],
planes: int,
blocks: int,
stride: int = 1,
dilate: bool = False,
) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
)
)
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer,
)
)
return nn.Sequential(*layers)
def _forward_impl(self, x: Tensor) -> Tensor:
# print(x.shape)
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
# print("layer1",x.shape)
x = self.layer2(x)
# print("layer2",x.shape)
x = self.layer3(x)
# print("layer3",x.shape)
x = self.layer4(x)
# print("layer4",x.shape)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = self.fc2(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
def _resnet(
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
progress: bool,
**kwargs: Any,
) -> ResNet:
model = ResNet(block, layers, **kwargs)
return model
def resnet34(*, progress: bool = True, **kwargs: Any) -> ResNet:
return _resnet(BasicBlock, [3, 4, 6, 3], progress, **kwargs)
def resnet50(*, progress: bool = True, **kwargs: Any) -> ResNet:
return _resnet(Bottleneck, [3, 4, 6, 3], progress, **kwargs)
def resnet101(*, progress: bool = True, **kwargs: Any) -> ResNet:
return _resnet(Bottleneck, [3, 4, 23, 3], progress, **kwargs)
3.dataset代码如下,data_length为后续使用的数据长度
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
def readFile(filename):
data = []
with open(filename, 'r') as file:
for line in file:
line = line.strip() # 去除行末的换行符
#print(line)
data.append(float(line))
return np.array(data)
class mmic(Dataset):
def __init__(self, csv_file, root_dir, data_length=10000, transform=None):
self.bp = pd.read_csv(csv_file,index_col="Index")
self.root_dir = root_dir
self.transform = transform
self.to_tensor = transforms.ToTensor()
self.dirlist = os.listdir(root_dir)
self.datalen = data_length
self.filelist = []
self.indexlist = []
for dirname in self.dirlist:
dirpath = os.path.join(self.root_dir,dirname)
files = os.listdir(dirpath)
for filename in files:
index = filename[:-4]
self.indexlist.append(index)
filename = os.path.join(dirpath,filename)
self.filelist.append(filename)
def __len__(self):
# return len(self.bp)
return self.datalen
def __getitem__(self, idx):
filename = self.filelist[idx]
index = self.indexlist[idx]
ppg = readFile(filename)
# 归一化
min_ppg = np.min(ppg)
max_ppg = np.max(ppg)
normalize_ppg = (ppg-min_ppg)/(max_ppg-min_ppg)
bpdata = self.bp.loc[int(index)].to_dict()
sbp = bpdata['SBP']
dbp = bpdata['DBP']
label = np.array([(sbp-40)/(200-40),(dbp-40)/(120-40)])
# print(index,sbp,dbp)
return normalize_ppg,label
if __name__ == "__main__":
dataset = mmic(csv_file='outputMIMIC-III_BP_testset.csv',
root_dir='test')
dataset.__getitem__(10)
4.训练代码如下,学习率和衰减策略可以自己定,需要修改读取csv的目录和root目录
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from custom_data import bpdata_train,bpdata_test
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics
import torchvision.models as models
from read_mmic import mmic
from tqdm import tqdm
from resnet import resnet34,resnet50
from log import getlog
log = getlog()
log.info("Start Train")
bp_train_dataset = mmic(csv_file='outputMIMIC-III_BP_trainset.csv',
root_dir='train',
data_length = 102400)
# Hyper parameters
num_epochs = 200
num_classes = 1
batch_size = 256
learning_rate = 0.00001
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_loader = torch.utils.data.DataLoader(dataset=bp_train_dataset,
batch_size=batch_size,
num_workers = 8,
shuffle=True)
# pdb.set_trace()
model = resnet50(num_classes=2,layernumber=50).to(device)
min_loss = 10
# Loss and optimizer
criterion = torch.nn.MSELoss(size_average = False)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2, last_epoch=-1)
# Train the model
total_step = len(train_loader)
# model.load_state_dict(torch.load("./modelbak/model150.ckpt"))
print(model)
log.info(str("total_step:"+total_step))
for epoch in range(num_epochs):
for i,(data,label) in enumerate(tqdm(train_loader)):
# print(i)
#print(data)
# data = data.to(device)
# label = label.to(device)
# Forward pass
# data = np.array(data))\
# print(label)
label = torch.tensor(label).to(device)
data = torch.tensor(data).to(device)
data = data.unsqueeze(0)
data = data.permute(1, 0, 2)
# data = data.reshape(batch_size,1,1000)
data = data.to(torch.float32)
label = label.to(torch.float32)
outputs = model(data)
# outputs = outputs[0]
loss = criterion(outputs, label)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
log.info('Epoch [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, loss.item()))
if epoch%10 == 0:
torch.save(model.state_dict(), './model/model%d.ckpt'%epoch)
if loss<min_loss:
min_loss = loss
torch.save(model.state_dict(), './model/model.ckpt')
lr_scheduler.step()
# print("第%d个epoch的学习率:%f" % (epoch+1, optimizer.param_groups[0]['lr']))
# break
# break
5.测试代码如下
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from custom_data import bpdata_train,bpdata_test
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics
import torch.nn.functional as F
from read_mmic import mmic
from resnet import resnet34,resnet50
import os
from log import getlog
log = getlog()
log.info("Start Test")
batch_size = 32
bp_train_dataset = mmic(csv_file='outputMIMIC-III_BP_trainset.csv',
root_dir='train')
bp_test_dataset = mmic(csv_file='outputMIMIC-III_BP_testset.csv',
root_dir='test',data_length=2500)
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
test_loader = torch.utils.data.DataLoader(dataset=bp_test_dataset,
batch_size=batch_size,
shuffle=False)
train_loader = torch.utils.data.DataLoader(dataset=bp_train_dataset,
batch_size=batch_size,
shuffle=True)
path = "./model"
modelist = os.listdir(path)
model = resnet50(num_classes=2,layernumber=50).to(device)
data_loader = train_loader
# log.info("test on train dataset")
data_loader = test_loader
log.info("test on test dataset")
for file_path in modelist:
# file_path = "./model/model83.ckpt"
#file_path = "model.ckpt"
file_path = os.path.join(path,file_path)
model.load_state_dict(torch.load(file_path))
model.eval()
output_list = list()
label_list = list()
sbp_list = list()
sbp_label_list = list()
dbp_list = list()
dbp_label_list = list()
for i,(data,label) in enumerate(data_loader):
label = torch.tensor(label).float().to(device)
# print(label)
data = torch.tensor(data).float().to(device)
data = data.unsqueeze(0)
data = data.permute(1, 0, 2)
with torch.no_grad():
outputs = model(data)
outputs = outputs.cpu().detach().numpy()[0]
label = label.cpu().numpy()[0]
# print("outputs:",outputs)
# print("label",label)
outputs = [outputs[0]*160+40,outputs[1]*80+40]
label = [label[0]*160+40,label[1]*80+40]
# print("outputs:",outputs)
# print("label",label)
output_list.append(outputs)
label_list.append(label)
sbp_list.append(outputs[0])
sbp_label_list.append(label[0])
dbp_list.append(outputs[1])
dbp_label_list.append(label[1])
#print(outputs,label)
# break
log.info('Testing MAE in {}: {} '.format(file_path,metrics.mean_absolute_error(label_list, output_list)))
用51200条数据进行训练,2500条数据测试
最后在测试集上SBP和DBP合起来的MAE为14,单独SBP的MAE为19.1,DBP的MAE为9.5