python 时间序列预测——多序列预测

数据集

股票指数,STOCKINDEX

import numpy as np
import pandas as pd
import urllib.request as request
import zipfile

# download
url ="http://www.economicswebinstitute.org/data/stockindexes.zip"
loc ="./stockindexes.zip"
request . urlretrieve (url , loc)

# unzip
dest_location ="./"
unzip = zipfile.ZipFile(loc ,'r')
unzip.extractall(dest_location )
unzip.close ()

# load one xls
loc = "stockindexes.xls"
Excel_file = pd. ExcelFile (loc)
print (Excel_file . sheet_names)
'''
['Description', 'Dow Jones Industrial', 'S&P500', 'NIKKEI 300', 'Dax30', 'CAC40', 'Swiss Market-Price Index', 'Mib30', 'IBEX 35I', 'Bel20', 'FTSE100']
'''

预测对象

主要关注两个指数:FTSE100【富时100指数】,Dow Jones Industrial【道琼斯工业指数

ftse_data = Excel_file .parse ('FTSE100')
dj_data = Excel_file .parse ('Dow Jones Industrial')

ftse100 = ftse_data . iloc [4:1357 ,1]
dj= dj_data . iloc [4:1357 ,1]

yt = pd. concat ([ ftse100 ,dj], axis =1)
yt = yt. reset_index ( drop = True )
yt. columns = ['ftse100', 'dj']
yt = yt. pct_change (1)
win =30
vol_t = yt. rolling ( window =win , center = True ).std ()
print(vol_t.shape)

plt.figure(figsize=(9,3))
plt.plot(vol_t)

在这里插入图片描述

预处理

引入时滞

x1 = np.log((vol_t.shift(1)/vol_t.shift(2))*vol_t.shift(1))
x2 = np.log((vol_t.shift(1)/vol_t.shift(3))*vol_t.shift(1))
x3 = np.log((vol_t.shift(1)/vol_t.shift(4))*vol_t.shift(1))
x4 = np.log((vol_t.shift(1)/vol_t.shift(5))*vol_t.shift(1))
x5 = np.log((vol_t.shift(1)/vol_t.shift(6))*vol_t.shift(1))
data =pd. concat ([ vol_t ,x1 ,x2 ,x3 ,x4 ,x5], axis=1)
data . columns = ['ftse_t ', 'dj_t ',
                  'ftse_t -1 ','dj_t -1 ',
                  'ftse_t -2 ','dj_t -2 ',
                  'ftse_t -3 ','dj_t -3 ',
                  'ftse_t -4 ','dj_t -4 ',
                  'ftse_t -5 ','dj_t -5 ']
data = data . dropna ()

cols_y = [ 'ftse_t ', 'dj_t ']
y = data [ cols_y ]

cols_x = [ 'ftse_t -1 ','dj_t -1 ',
       'ftse_t -2 ','dj_t -2 ',
       'ftse_t -3 ','dj_t -3 ',
       'ftse_t -4 ','dj_t -4 ',
       'ftse_t -5 ','dj_t -5 ']
x = data [ cols_x ]

归一化

num_attrib =10
scaler_x = preprocessing.MinMaxScaler(feature_range =(-1,1))
x = np.array (x).reshape((len(x),num_attrib))
x = scaler_x . fit_transform (x)

num_response =2
scaler_y = preprocessing . MinMaxScaler (feature_range =(0 , 1))
y = np. array (y). reshape (( len(y), num_response ))
y = scaler_y . fit_transform (y)

数据集拆分

train_end = 1131
data_end =len(y)
x_train =x[0: train_end ,]
x_test =x[train_end +1: data_end,]
y_train =y[0: train_end]
y_test =y[train_end +1: data_end]
x_train = np.reshape(x_train,(x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test,(x_test.shape[0], 1, x_test.shape[1]))
print (" Shape of x_train is ",x_train . shape)  # (1131, 1, 10)
print (" Shape of x_test is ",x_test . shape)  # (185, 1, 10)

训练

seed =2016
num_epochs =20
np.random.seed ( seed )
model = Sequential()
model .add(SimpleRNN ( units =10 ,activation ='sigmoid',input_shape =(1 , num_attrib )))
model .add(Dense(units = num_response ,activation ='linear'))
sgd = SGD (lr =0.01 , momentum =0.90 , nesterov = True )
model . compile ( loss ='mean_squared_error',optimizer = sgd)
model .fit( x_train , y_train , batch_size =1, epochs = num_epochs)

score_train = model .evaluate(x_train ,y_train , batch_size =1)
score_test = model .evaluate(x_test , y_test ,batch_size =1)
print ("in train MSE = ", round( score_train,5))
print ("in test MSE = ", round( score_test ,5))

预测

pred = fit1.predict (x_test ) 
# pred1 = scaler_y.inverse_transform (np. array( pred1 ). reshape (( len( pred1 ), 2)))

plt.plot(y_test[:,0])
plt.plot(pred[:,0])
plt.legend(['FTSE100','FTSE100 prediction'])
plt.show()

plt.plot(y_test[:,1])
plt.plot(pred[:,1])
plt.legend(['Dow Jones','Dow Jones prediction'])
plt.show()

在这里插入图片描述
在这里插入图片描述

时间序列预测是一种常见的问题,可以使用LSTM(长短期记忆)模型来解决。LSTM是一种循环神经网络(RNN),能够有效地捕捉时间序列中的长期依赖关系。 下面是一个使用Python和Keras库实现LSTM模型进行时间序列预测的示例代码: ```python import numpy as np import pandas as pd from keras.models import Sequential from keras.layers import LSTM, Dense # 读取时间序列数据 data = pd.read_csv('data.csv') # 替换为实际的数据文件路径 # 数据预处理 # 将数据拆分为训练集和测试集 train_data = data.iloc[:800] # 使用前800个数据作为训练集 test_data = data.iloc[800:] # 使用后面的数据作为测试集 # 构建特征和标签 def create_dataset(dataset, look_back): X, Y = [], [] for i in range(len(dataset) - look_back): X.append(dataset[i:i + look_back]) Y.append(dataset[i + look_back]) return np.array(X), np.array(Y) look_back = 10 # 定义用于预测的时间步长 train_X, train_Y = create_dataset(train_data, look_back) test_X, test_Y = create_dataset(test_data, look_back) # 构建LSTM模型 model = Sequential() model.add(LSTM(units=50, activation='relu', input_shape=(look_back, 1))) model.add(Dense(units=1)) model.compile(optimizer='adam', loss='mean_squared_error') # 训练模型 model.fit(train_X, train_Y, epochs=100, batch_size=32) # 预测 train_predict = model.predict(train_X) test_predict = model.predict(test_X) # 可视化结果 import matplotlib.pyplot as plt # 绘制训练集和测试集的实际值 plt.plot(np.arange(len(train_data)), train_data, 'b', label='actual') plt.plot(np.arange(len(train_data), len(train_data) + len(test_data)), test_data, 'g', label='actual') # 绘制训练集和测试集的预测值 plt.plot(np.arange(look_back, len(train_predict) + look_back), train_predict, 'r', label='predicted') plt.plot(np.arange(len(train_predict) + look_back, len(train_predict) + look_back + len(test_predict)), test_predict, 'y', label='predicted') plt.legend() plt.show() ``` 在上面的代码中,首先读取时间序列数据,然后将数据拆分为训练集和测试集。接下来,通过定义一个`create_dataset`函数将时间序列数据转换为特征和标签,其中特征是前`look_back`个时间步长的数据,标签是下一个时间步长的数据。然后,使用Keras库构建一个简单的LSTM模型,并编译模型。 训练模型时,使用训练集的特征和标签进行训练。训练完成后,使用训练集和测试集的特征进行预测,并将结果可视化。 请注意,上述代码仅为示例,实际使用时可能需要根据具体情况进行适当调整。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值