# 导入必要的库
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import Adam
# 设置LSTM的时间窗等参数
window = 5
lstm_units = 16
dropout = 0.01
epoch = 20
# 设置显示字体的大小和字体
plt.rcParams['font.sans-serif'] = ['SimSun']
plt.rcParams['font.size'] = 10
# 数据读取
filepath = "E:\\Computer-vision\\疾病预警\\heart.csv"
heart_df = pd.read_csv(filepath)
# 划分特征值和目标值
features = heart_df.drop(columns=['target']) # 删除target这一列,剩下的就是特征值
targets = heart_df['target'] # target这一列是目标值
features[['sex', 'cp', 'fbs', 'exang', 'slope', 'thal', 'restecg', 'ca']] = features[
['sex', 'cp', 'fbs', 'exang', 'slope', 'thal', 'restecg', 'ca']].astype('str')
input_size=len(features.iloc[1,:])
# 接收离散型数据
features_dis = features[['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']]
# 接收连续型数据
features_con = features.drop(columns=features_dis.columns)
print(features_con)
# 离散数据one-hot编码--即字典特征提取
features_dis = pd.get_dummies(features_dis)
print(features_dis)
# 连续型数据标准化
from sklearn.preprocessing import StandardScaler # 导入标准化方法
features_con = pd.DataFrame(StandardScaler().fit_transform(features_con)) # 将标准化之后的数据变成DataFrame类型
# 按列方向拼接在一起 .join() ,用新变量features_temp接收
features_temp = features_con.join(features_dis)
# print(features_temp)
# 切分测试集和训练集
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_temp, targets, test_size=0.25)
# #读取数据
# df1=pd.read_csv('data.csv')
# df1=df1.iloc[:,2:]
# df1.tail()
# 建立LSTM模型 训练
inputs = Input(shape=(window, input_size))
model = Conv1D(filters=lstm_units, kernel_size=1, activation='sigmoid')(inputs) # 卷积层
model = MaxPooling1D(pool_size=window)(model) # 池化层
model = Dropout(dropout)(model) # droupout层
model = Bidirectional(LSTM(lstm_units, activation='tanh'), name='bilstm')(model) # 双向LSTM层
attention = Dense(lstm_units * 2, activation='sigmoid', name='attention_vec')(model) # 求解Attention权重
model = Multiply()([model, attention]) # attention与LSTM对应数值相乘
outputs = Dense(1, activation='tanh')(model)
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
model.summary() # 展示模型结构
history = model.fit(x_train, y_train, nb_epoch=epoch, batch_size=256, shuffle=False,
validation_data=(x_test, y_test)) # 训练模型
# 迭代图像
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epoch)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Train and Val Loss')
plt.show()
# 在训练集上的拟合结果
y_train_predict = model.predict(x_train)
y_train_predict = y_train_predict[:, 0]
draw = pd.concat([pd.DataFrame(y_train), pd.DataFrame(y_train_predict)], axis=1)
draw.iloc[200:500, 0].plot(figsize=(12, 6))
draw.iloc[200:500, 1].plot(figsize=(12, 6))
plt.legend(('real', 'predict'), fontsize='15')
plt.title("Train Data", fontsize='30') # 添加标题
# 在测试集上的预测
y_test_predict = model.predict(x_test)
y_test_predict = y_test_predict[:, 0]
draw = pd.concat([pd.DataFrame(y_test), pd.DataFrame(y_test_predict)], axis=1);
draw.iloc[200:500, 0].plot(figsize=(12, 6))
draw.iloc[200:500, 1].plot(figsize=(12, 6))
plt.legend(('real', 'predict'), loc='upper right', fontsize='15')
plt.title("Test Data", fontsize='30') # 添加标题
# 输出结果
def mape(y_true, y_pred):
return np.mean(np.abs((y_pred - y_true) / y_true)) * 100
def up_down_accuracy(y_true, y_pred):
y_var_test = y_true[1:] - y_true[:len(y_true) - 1] # 实际涨跌
y_var_predict = y_pred[1:] - y_pred[:len(y_pred) - 1] # 原始涨跌
txt = np.zeros(len(y_var_test))
for i in range(len(y_var_test - 1)): # 计算数量
txt[i] = np.sign(y_var_test[i]) == np.sign(y_var_predict[i])
result = sum(txt) / len(txt)
return result
print('训练集上的MAE/MSE/MAPE/涨跌准确率')
print(mean_absolute_error(y_train_predict, y_train))
print(mean_squared_error(y_train_predict, y_train))
print(mape(y_train_predict, y_train))
print(up_down_accuracy(y_train_predict, y_train))
print('测试集上的MAE/MSE/MAPE/涨跌准确率')
print(mean_absolute_error(y_test_predict, y_test))
print(mean_squared_error(y_test_predict, y_test))
print(mape(y_test_predict, y_test))
print(up_down_accuracy(y_test_predict, y_test))
预测模型的评估和可视化
最新推荐文章于 2024-11-13 13:44:26 发布