通过利用前一时间点的值来预测下一时间点的值,将这个时间序列数据集重构为一个监督学习问题,记一下代码
import numpy as np
# #2. 单特征样本,取一个步长
# dataset = np.array([i for i in range(22)])
# n_dim = dataset.shape[0]
#
# train_size = int(len(dataset) * 7/9) # 9分之7的数据训练
# validation_size = len(dataset) - train_size # 9分之2测试数据
# train, validation = dataset[0:train_size], dataset[train_size:len(dataset)] # 数据切割
#
#
#
# def split_sequence(data,sliding_window_width):
# X,y = [],[]
# for i in range(len(data)):
# end_element_index = i + sliding_window_width
# if end_element_index > len(data) - 1 :
# break
# #这里不用减1,因为列表读数不包含最后一个
# # x_sequence = data[i:i+sliding_window_width - 1]
# x_sequence = data[i:end_element_index]
# y_sequence = data[end_element_index]
# X.append(x_sequence)
# y.append(y_sequence)
# return np.array(X), np.array(y)
#
#
# sw_width = 3
# x_train, y_train = split_sequence(train,sw_width) #测试
# x_valition, y_valition = split_sequence(validation,sw_width) #验证
#
# print(train_size)
# print(x_train.shape)
# for i in range(len(x_train)):
# print('X:',x_train[i],'y:',y_train[i])
#3.单特征多步长截取
dataset = [i for i in range (25)]
sw_width = 4
def split_sequnece(sequence,sliding_window_width):
X,y = [],[]
for i in range(len(sequence)):
end_element_index = i + sliding_window_width
if (end_element_index + 2) > len(sequence) - 1 :
break
x_sequence = sequence[i:end_element_index]
y_sequence = sequence[end_element_index:end_element_index + 2]
X.append(x_sequence)
y.append(y_sequence)
return np.array(X),np.array(y)
#数据集划分,三分之二为训练,三分之一为测试
train_size = int(len(dataset) * 0.67)
validation_size = len(dataset) - train_size
train,validation = dataset[0:train_size],dataset[train_size:len(dataset)]
X_train,y_train = split_sequnece(train,sw_width)
for i in range (len(X_train)):
print('x:',X_train[i],'y:',y_train[i])