参考:LSTM的时序应用
修改了一丁丁点
import numpy as np
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def sample_split(df, is_scale=False): # data processing
train_size = int(len(df)*0.67)
test_size = len(df) - train_size
dataset = df.iloc[:,1:].values
if is_scale:
scaler = MinMaxScaler(feature_range=(0,1))
dataset = scaler.fit_transform(dataset)
train_set, test_set = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
train_x, train_y = train_set[:, 1:dataset.shape[1]], train_set[:,0]
test_x, test_y = test_set[:, 1:dataset.shape[1]], test_set[:,0]
return train_x, train_y, test_x, test_y
def lstm_predict(df, hidden_number):
train_x, train_y, test_x, test_y = sample_split(df, is_scale=True)
train_x = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
test_x = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1]))
model = Sequential()
model.add(LSTM(hidden_number, input_dim=df.values.shape[1]-2))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x, train_y, nb_epoch=1750, batch_size=int(train_x.shape[0]/3),verbose=2)
train_predict, test_predict = model.predict(train_x), model.predict(test_x)
scaler = MinMaxScaler(feature_range=(0,1))
scaler_y = scaler.fit_transform(df.values[:,1].reshape(-1,1))
train_y, test_y = scaler.inverse_transform([train_y]), scaler.inverse_transform([test_y])
train_predict, test_predict = scaler.inverse_transform(train_predict), scaler.inverse_transform(test_predict)
train_score = math.sqrt(mean_squared_error(train_y[0], train_predict[:,0]))
print('Train Score:%.2f RMSE'%(train_score))
test_score = math.sqrt(mean_squared_error(test_y[0], test_predict[:,0]))
print('Test Score: %.2f RMSE'%(test_score))
return np.vstack((train_predict, test_predict))
if __name__ == '__main__':
np.random.seed(7)
data = pd.read_csv('lstm_data.csv')
display(data) # 查看数据格式
y_predict = lstm_predict(data, hidden_number=6)
# 画图
plt.figure(figsize=(10,4))
plt.plot(y_predict, label='Pridict', alpha=0.8)
plt.plot(y, label='Real', alpha=0.8)
plt.legend()
plt.ylabel('Price')
plt.xlabel('Date')
plt.title('BABA(LSTM Model)')
因变量是close_tomorrow