兄弟们,点赞加个关注吧
基于keras构建DNN神经网络,可以比较快捷设置好所需网络结构。
01、首先导入预先需要的模块
import keras
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.core import util
from keras import layers, models, utils, backend as K
import shap
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score, recall_score
from sklearn.metrics import f1_score, confusion_matrix
02、导入数据模块
data = pd.read_csv("data.csv")
X_train, X_test, y_train, y_test = train_test_split(data.iloc[1:, 0:-1],
data.iloc[1:, -1],
test_size=0.2,
random_state=42)
03、数据预处理
# 标准化
std = StandardScaler()
X_train = pd.DataFrame(data=std.fit_transform(X_train), columns=[X_train.columns])
joblib.dump(std, "std.dat")
X_test = pd.DataFrame(data=std.transform(X_test), columns=[X_test.columns])
04、搭建神经网络
神经网络的隐藏层最好是从从大逐渐到小来设置,这样更具有学习力。
n_features = 27
model = models.Sequential(name="DeepNN", layers=[
# 第1个隐层
layers.Dense(name="h1", input_dim=n_features,
units=int(round((n_features * 6))),
activation='relu'),
layers.Dropout(name="drop1", rate=0.2),
# 第2个隐层
layers.Dense(name="h2", units=int(round((n_features * 5))),
activation='relu'),
layers.Dropout(name="drop2", rate=0.2),
# 第3个隐层
layers.Dense(name="h3", units=int(round((n_features * 4))),
activation='relu'),
layers.Dropout(name="drop3", rate=0.2),
# 第4个隐层
layers.Dense(name="h4", units=int(round((n_features * 3))),
activation='relu'),
layers.Dropout(name="drop4", rate=0.2),
# 第5个隐层
layers.Dense(name="h5", units=int(round((n_features * 2))),
activation='relu'),
layers.Dropout(name="drop5", rate=0.2),
# 最后层
layers.Dense(name="output", units=1, activation='sigmoid')
])
model.summary()
# 编译神经网络
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', F1])
05、设置评价指标
评价指标可以直接上面在模型编译的时候进行添加。
#定义评估准则
def Recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def Precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def F1(y_true, y_pred):
precision = Precision(y_true, y_pred)
recall = Recall(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
06、训练和评估
training = model.fit(x=X_train, y=y_train, batch_size=2048, epochs=300,
shuffle=True, verbose=2, validation_split=0.2)
y_pre = model.predict(x=X_test, batch_size=512)
y_pre[y_pre >= 0.5] = 1
y_pre[y_pre < 0.5] = 0
cm = confusion_matrix(y_test, y_pre, normalize='true')
print("混淆矩阵:\n", cm)
# 评价指标
accuracy_scores = round(accuracy_score(y_test, y_pre, normalize=True), 4)
precision_scores = round(precision_score(y_test, y_pre), 4)
recall_scores = round(recall_score(y_test, y_pre), 4)
f1_scores = round(f1_score(y_test, y_pre), 4)
print(car_name + "准确值:", round(accuracy_scores, 4))
print(car_name + "精确值:", round(precision_scores, 4)) # 表示预测为正样本中,被实际为正样本的比例
print(car_name + "召回率:", round(recall_scores, 4)) # 召回率是表示实际为正样本中,预测为正样本的比例
print(car_name + "F1数值:", round(f1_scores, 4))