本部分的CNN模型采用https://blog.csdn.net/qq_43750573/article/details/105930152所用的模型,代码也是在该博主的代码基础上修改得到,CNN的模型介绍可参考论文(《面向心血管疾病识别的心电信号分类研究》)
代码及流程
1.下载数据
通过读取二进制文件,得到训练集和测试集的数据和标签
# 获取数据
def load_data(filename, sample_num, sample_len):
fp = open(filename, "rb")
if sample_len == data_len:
context = fp.read(2 * sample_num * sample_len)
fmt_unpack = '%d' % (sample_num * sample_len) + 'h'
dat_arr = np.array(struct.unpack(fmt_unpack, context), dtype=float)
dat_arr = dat_arr.reshape((sample_num, sample_len))
for i in range(0, sample_num):
dat_arr[i] = normalization(dat_arr[i])
dat_arr = dat_arr.reshape(-1, data_len, 1)
else:
context = fp.read(sample_num * sample_len)
fmt_unpack = '%d' % (sample_num * sample_len) + 'B'
dat_arr = np.array(struct.unpack(fmt_unpack, context))
fp.close()
return dat_arr
# 获取训练和测试的数据
def get_data():
data_path = os.path.join(os.getcwd(), "data")
# 训练数据,35个个体,168000个心拍,N与V比例1:1,前半部分为N,后半部分为V
data_train = load_data(os.path.join(data_path, data_train_name), train_num, data_len)
label_train = load_data(os.path.join(data_path, label_train_name), train_num, 1)
# 测试数据,15个个体,72000个心拍,N与V比例1:1,前半部分为N,后半部分为V
data_test = load_data(os.path.join(data_path, data_test_name), test_num, data_len)
label_test = load_data(os.path.join(data_path, label_test_name), test_num, 1)
return data_train, label_train, data_test, label_test
2. 构建CNN模型
参考博文1,进行CNN模型构建,仅修改输入和输出参数,其他参数未做修改
# 构建CNN模型
def build_model():
new_model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(data_len, 1)),
# 第一个卷积层, 4 个 21x1 卷积核
tf.keras.layers.Conv1D(filters=4, kernel_size=21, strides=1, padding='SAME', activation='relu'),
# 第一个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
# 第二个卷积层, 16 个 23x1 卷积核
tf.keras.layers.Conv1D(filters=16, kernel_size=23, strides=1, padding='SAME', activation='relu'),
# 第二个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
# 第三个卷积层, 32 个 25x1 卷积核
tf.keras.layers.Conv1D(filters=32, kernel_size=25, strides=1, padding='SAME', activation='relu'),
# 第三个池化层, 平均池化,4 个 3x1 卷积核, 步长为 2
tf.keras.layers.AvgPool1D(pool_size=3, strides=2, padding='SAME'),
# 第四个卷积层, 64 个 27x1 卷积核
tf.keras.layers.Conv1D(filters=64, kernel_size=27, strides=1, padding='SAME', activation='relu'),
# 打平层,方便全连接层处理
tf.keras.layers.Flatten(),
# 全连接层,128 个节点
tf.keras.layers.Dense(128, activation='relu'),
# Dropout层,dropout = 0.2
tf.keras.layers.Dropout(rate=0.2),
# 全连接层,5 个节点
tf.keras.layers.Dense(class_num, activation='softmax')
])
return new_model
3. 用训练集数据训练模型
metric = 'accuracy'
model = build_model()
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=[metric])
model.summary()
# 定义TensorBoard对象
log_dir_path = os.path.join(project_path, "logs")
if not os.path.exists(log_dir_path):
os.mkdir(log_dir_path)
log_dir = os.path.join(log_dir_path, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
ts_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# 学习率回调函数
lr_callback = tfc.python.keras.callbacks.LearningRateScheduler(schedule=scheduler)
# 训练与验证
history = model.fit(data_train, label_train,
epochs=epochs,
batch_size=batch_size,
validation_split=validation_split,
callbacks=[ts_callback, lr_callback])
4. 保存模型
保存模型为h5以及pb,pb模型用于C++调用
def save_model_to_pb():
model_h5 = os.path.join(os.getcwd(), "model_h5")
if not os.path.exists(model_h5):
os.mkdir(model_h5)
model_h5_path = os.path.join(model_h5, model_h5_name)
model_pb = os.path.join(os.getcwd(), "model_pb")
if not model_pb:
os.mkdir(model_pb)
model = tf.keras.models.load_model(model_h5_path,
compile=False)
model.summary()
full_model = tf.function(lambda Input: model(Input))
full_model = full_model.get_concrete_function(tf.TensorSpec(model.inputs[0].shape,
model.inputs[0].dtype))
# Get frozen ConcreteFunction
frozen_func = convert_variables_to_constants_v2(full_model)
frozen_func.graph.as_graph_def()
layers = [op.name for op in frozen_func.graph.get_operations()]
print("-" * 50)
print("Frozen model layers: ")
for layer in layers:
print(layer)
print("-" * 50)
print("Frozen model inputs: ")
print(frozen_func.inputs)
print("Frozen model outputs: ")
print(frozen_func.outputs)
# Save frozen graph from frozen ConcreteFunction to hard drive
tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
logdir=model_pb,
name=model_pb_name,
as_text=False)
5. 用验证集数据验证模型
在最近的一次测试中,结果如下:
Accuracy=97.421%
Accuracy_N=98.250%
Accuracy_V=96.592%
pred = model.predict(data_test)
label_pred = np.argmax(pred, axis=1)
# 保存前10个样本的预测结果
save_prediction_to_text(pred, 10)
acc = np.mean(label_pred == label_test)
conf_mat = confusion_matrix(label_test, label_pred) # 利用专用函数得到混淆矩阵
acc_n = conf_mat[0][0] / np.sum(conf_mat[0])
acc_v = conf_mat[1][1] / np.sum(conf_mat[1])
print('\nAccuracy=%.3f%%' % (acc * 100))
print('Accuracy_N=%.3f%%' % (acc_n * 100))
print('Accuracy_V=%.3f%%' % (acc_v * 100))
print('\nConfusion Matrix:\n')
print(conf_mat)
参考资料
[1] https://blog.csdn.net/qq_43750573/article/details/105930152
[2] 张宇微.面向心血管疾病识别的心电信号分类研究[D].山东:济南大学,2019