量化金融——利用RNN和LSTM预测开盘价
第一期
本期利用茅台以2020.6.1 号到2020.12.31号的开盘价为数据,总共
116+15+15=146组数据,其中训练集(train)116组数据,测试集(test)15组数据,验证集(prediction)15组数据
数据生成
导入相关包和库
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
数据预处理
#导入训练集train_set
training_set=pd.read_csv('maotai_train.csv',encoding='gbk')
#extract open value from the trainng data
training_set=training_set.iloc[:,1:2].values
train=training_set
#特征值进行极差标准化/归一化
sc=MinMaxScaler()
training_set=sc.fit_transform(training_set)
X_train= training_set[:]
Y_train= training_set[:]
#reshape
X_train=np.reshape(X_train,(116,1,1))
搭建RNN模型
#初始化RNN
regressor=Sequential()
regressor.add(LSTM(units=50,activation='sigmoid', input_shape=(1,1)))
regressor.add(Dense(units=1))
#LSTM
regressor.compile(optimizer='adam',loss='mean_squared_error')
#fit
regressor.fit(X_train,Y_train,batch_size=1,epochs=20)
预测
#测试集
test_set=pd.read_csv('maotai_test.csv',encoding='gbk')
real_stock_price=test_set.iloc[:,1:2].values
real_stock_price1=test_set.iloc[:,1:2].values
#预测集
inputs=real_stock_price
inputs=sc.transform(inputs)
inputs=np.reshape(inputs,(15,1,1)) #scaling the values
predicted_stock_price = regressor.predict(inputs)
predicted_stock_price = sc.inverse_transform(predicted_stock_price) #scaling to input values
可视化
x1=np.linspace(1,117,num=116,endpoint=False)
x2=np.linspace(117,131,num=15,endpoint=False)
x3=np.linspace(117,131,num=15,endpoint=False)
plt.plot(x1,train,'b-',color='blue',label='train')
plt.plot(x2,real_stock_price1,'b-',color='red',label='Real')
plt.plot(x3,predicted_stock_price,'b-',color='green',label='Predicted')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
评估模型
import math
from sklearn.metrics import mean_squared_error
rmse=math.sqrt(mean_squared_error(real_stock_price, predicted_stock_price))
#expressing RMSE in percentage
rmse=rmse/120 # 800 becasue it is average value
print(rmse)