#!/usr/bin/python3
# -*- coding: utf-8 -*-
import numpy as np
from pandas import read_csv
from matplotlib import pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
data = read_csv('C:\\Users\\50515\\Desktop\\PythonTest\\传染病按医疗机构按日数据.csv')
data = data.iloc[:,:][data.ORGCODE==330784002]
data = data.sort_values(by='TONGJIRQ')
data = data['BINGLISHU']
data = data.values.astype('float32')
data = np.array(data).reshape(-1,1)
scaler=MinMaxScaler()
dataset=scaler.fit_transform(data)#归一化
seed = 7
np.random.seed(seed)
batch_size = 1
epochs = 40
look_back=3
hidden_layer_num=4
def create_data0(data,look_back=3,splitsize=0.7):
"""perceptron_model训练的数据导入"""
datax,datay=[],[]
for i in range(len(data)-look_back-1):
x=data[i:i+look_back,0]#每次去look_back个数据
datax.append(x)
y=data[i+look_back,0]#如果x是0,1,2,则y是3
datay.append(y)
return datax,datay
def create_data(data,look_back=3,splitsize=0.7):
"""
给定当前时间t,预测t+1,使用数据(t-2,t-1,t)
:param data:数据
:param look_back:输出数据集的格式,默认3[1,2,3],如果改成4则为[1,2,3,4]
:return: (x_train,y_train),(x_test,y_test):实际数据,结果,训练数据,结果
"""
datax,datay=[],[]
for i in range(len(data)-look_back-1):
x=data[i:i+look_back,0]#每次去look_back个数据
datax.append(x)
y=data[i+look_back,0]#如果x是0,1,2,则y是3
datay.append(y)
train_size=int(len(datax)*splitsize)
# test_size=int(len(datax)-train_size)
# 前半截数据用于训练,后半截数据用于测试
x_train,y_train=np.array(datax[0:train_size]),np.array(datay[0:train_size])
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))#样本、时间步长、特征
x_test,y_test=np.array(datax[train_size:len(datax)]),np.array(datay[train_size:len(datax)])
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
return (x_train,y_train),(x_test,y_test)
def perceptron_model():
#多层感知器模型,hidden_layer_num隐藏层层数
"""这里导入的数据不需要进行reshpe改变,直接用datax.append(x),datay.append(y)的数据就行"""
model=Sequential()
model.add(Dense(units=hidden_layer_num,input_dim=look_back,activation='relu'))
model.add(Dense(units=hidden_layer_num, activation='relu'))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error',optimizer='adam')
return model
def time_model():
#LSTM搭建的LSTM回归模型
model=Sequential()
model.add(LSTM(units=hidden_layer_num,input_shape=(1,look_back)))#四个隐藏层或者更多
model.add(Dense(units=1))
model.compile(loss='mean_squared_error',optimizer='adam')
return model
def time_step_model():
#使用时间步长的LSTM回归模型
model=Sequential()
model.add(LSTM(units=hidden_layer_num,input_shape=(look_back,1)))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error',optimizer='adam')
return model
def memory_batches_model():
#LSTM的批次时间记忆模型
model=Sequential()
# 通过设置stateful为True来保证LSTM层内部的状态,从而获得更好的控制
model.add(LSTM(units=hidden_layer_num,batch_input_shape=(batch_size,look_back,1),stateful=True))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error',optimizer='adam')
return model
def stack_memory_batches_model():
# 两个叠加的LSTM的批次时间记忆模型
model=Sequential()
#通过设施return_sequences等于True来完成每个LSTM层之前的LSTM层必须返回序列,将LSTM扩展位两层
model.add(LSTM(units=hidden_layer_num,batch_input_shape=(batch_size,look_back,1),stateful=True,return_sequences=True))
# 通过设置stateful为True来保证LSTM层内部的状态,从而获得更好的控制
model.add(LSTM(units=hidden_layer_num,input_shape=(batch_size,look_back,1),stateful=True))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error',optimizer='adam')
return model
def get_model(filename='.\\model\\model',choosemodel='json'):
"""
载入训练好的模型json
:param filename:模型名
:return: model:返回模型
"""
if choosemodel=='json':
from keras.models import model_from_json
model = model_from_json(open(filename+'.json').read())
model.load_weights(filename+'.h5')
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print("Get json model!")
return model
elif choosemodel=='yaml':
from keras.models import model_from_yaml
model=model_from_yaml(open(filename+'.yaml').read())
model.load_weights(filename+'.h5')
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
print("Get yaml model!")
return model
def save_model(model,filename='.\\model\\model',choosemodel='json'):
"""
保存训练好的模型json
:param model: 传入模型
:param filename: 保存模型名
:return: None
"""
if choosemodel=='json':
json_string = model.to_json()
open(filename+'.json','w').write(json_string)
model.save_weights(filename+'.h5', overwrite=True)
print("Save json model!")
elif choosemodel=='yaml':
yaml_string=model.to_yaml()
open(filename+'.yaml','w').write(yaml_string)
model.save_weights(filename+'.h5',overwrite=True)
print("Save yaml model!")
(X_train,y_train),(X_validation,y_validation)=create_data(dataset,look_back=look_back)
model=memory_batches_model()
for i in range(epochs):
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0, shuffle=False)
mean_loss = np.mean(history.history['loss'])
print('mean loss %.5f for loop %s' % (mean_loss, str(i)))
model.reset_states()#重置模型中所有层的状态
# 保存模型
save_model(model,filename='memory_batches_model_84')
# model=get_model(filename='.\\model\\memory_batches_model')
# model=get_model(filename='.\\model\\stack_memory_batches_model')
# 模型预测数据
predict_train = model.predict(X_train, batch_size=batch_size)
model.reset_states()#重置模型中所有层的状态
predict_validation = model.predict(X_validation, batch_size=batch_size)
# 反标准化数据 --- 目的是保证MSE的准确性
predict_train = scaler.inverse_transform(predict_train)
y_train = scaler.inverse_transform([y_train])
predict_validation = scaler.inverse_transform(predict_validation)
y_validation = scaler.inverse_transform([y_validation])
# # 评估模型
# #原本predict_train是一个二维数组,通过predict_train[:, 0]使其变成一位数组
train_score = math.sqrt(mean_squared_error(y_train[0], predict_train[:, 0]))
print('Train Score: %.2f RMSE' % train_score)
validation_score = math.sqrt(mean_squared_error(y_validation[0], predict_validation[:, 0]))
print('Validatin Score: %.2f RMSE' % validation_score)
# 构建通过训练集进行预测的图表数据
predict_train_plot = np.empty_like(dataset)#复制
predict_train_plot[:, :] = np.nan#置nan
predict_train_plot[look_back:len(predict_train)+look_back, :] = predict_train#赋值,将训练预测值赋予plot中的前一部分
# 构建通过评估数据集进行预测的图表数据
predict_validation_plot = np.empty_like(dataset)
predict_validation_plot[:, :] = np.nan
#紧接训练预测数据,在后面介绍测试预测数据值
predict_validation_plot[len(predict_train)+look_back:len(dataset) -1, :] = predict_validation#将测试预测值赋予plot中的后一部分
# plt绘制数据
dataset = scaler.inverse_transform(dataset)#反归一化
print('test' % dataset)
plt.plot(dataset, color='blue')#绘制原图
plt.plot(predict_train_plot, color='green')#训练数据预测图
plt.plot(predict_validation_plot, color='red')#测试数据预测图
plt.show()#显示