序列和时间序列
时序信号的生成
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series):
plt.figure(figsize=(10,6))
plt.plot(time,series)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope*time
time = np.arange(4*365+1)
baseline=10
series = trend(time,0.1)
plot_series(time,series)
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series):
plt.figure(figsize=(10,6))
plt.plot(time,series)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope*time
time = np.arange(4*365+1)
baseline=10
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
amplitude = 40
series = seasonality(time,period=365,amplitude=amplitude)
plot_series(time,series)
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series):
plt.figure(figsize=(10,6))
plt.plot(time,series)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope*time
time = np.arange(4*365+1)
baseline=10
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
amplitude = 40
#series = seasonality(time,period=365,amplitude=amplitude)
#plot_series(time,series)
slope=0.05
series=baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
plot_series(time,series)
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series):
plt.figure(figsize=(10,6))
plt.plot(time,series)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope*time
time = np.arange(4*365+1)
baseline=10
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
amplitude = 40
series = seasonality(time,period=365,amplitude=amplitude)
plot_series(time,series)
slope=0.05
series=baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
plot_series(time,series)
def noise(time,noise_level=1):
return np.random.randn(len(time))*noise_level
noise_level=15
noisy_series = series +noise(time,noise_level)
plot_series(time,noisy_series)
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series):
plt.figure(figsize=(10,6))
plt.plot(time,series)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope*time
time = np.arange(4*365+1)
baseline=10
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
amplitude = 40
series = seasonality(time,period=365,amplitude=amplitude)
#plot_series(time,series)
slope=0.05
series=baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
#plot_series(time,series)
def noise(time,noise_level=1):
return np.random.randn(len(time))*noise_level
noise_level=40
noisy_series = series +noise(time,noise_level)
#plot_series(time,noisy_series)
#两个数据平滑的函数
def autocorrelation1(time,amplitude):
rho1=0.5
rho2=-0.1
ar=np.random.randn(len(time)+50)
ar[:50]=100
for step in range(50,len(time)+50):
ar[step]+=rho1*ar[step-50]
ar[step]+=rho2*ar[step-33]
return ar[50:]*amplitude
def autocorrelation2(time,amplitude):
rho=0.8
ar=np.random.randn(len(time)+1)
for step in range(1,len(time)+1):
ar[step]+=rho*ar[step-1]
return ar[1:]*amplitude
series = autocorrelation2(time,10)
plot_series(time[:200],series[:200])
时间序列预测方法
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series,format="-",start=0,end=None):
plt.plot(time[start:end],series[start:end],format)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
def trend(time,slope=0):
return slope*time
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
def noise(time,noise_level=1,seed=None):
rnd = np.random.RandomState(seed)
return np.random.randn(len(time))*noise_level
time = np.arange(4*365+1,dtype="float32")
baseline=10
series = trend(time,0.1)
baseline=10
amplitude=40
slope=0.05
noise_level=5
#create the series
series = baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
#update with noise
series+=noise(time,noise_level,seed=42)
plt.figure(figsize=(10,6))
plot_series(time,series)
plt.show()
#将数据分为测试集和校验集
split_time=1000
time_train = time[:split_time]
x_train=series[:split_time]
time_valid = time[split_time:]
x_valid=series[split_time:]
plt.figure(figsize=(10,6))
plot_series(time_train,x_train)
plt.show()
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plt.show()
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
def plot_series(time,series,format="-",start=0,end=None):
plt.plot(time[start:end],series[start:end],format)
plt.xlabel("time")
plt.ylabel("value")
plt.grid(True)
def trend(time,slope=0):
return slope*time
def seasonal_pattern(season_time):
return np.where(season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time))
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude*seasonal_pattern(season_time)
def noise(time,noise_level=1,seed=None):
rnd = np.random.RandomState(seed)
return np.random.randn(len(time))*noise_level
time = np.arange(4*365+1,dtype="float32")
baseline=10
series = trend(time,0.1)
baseline=10
amplitude=40
slope=0.05
noise_level=5
#create the series
series = baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
#update with noise
series+=noise(time,noise_level,seed=42)
plt.figure(figsize=(10,6))
plot_series(time,series)
plt.show()
#将数据分为测试集和校验集
split_time=1000
time_train = time[:split_time]
x_train=series[:split_time]
time_valid = time[split_time:]
x_valid=series[split_time:]
plt.figure(figsize=(10,6))
# plot_series(time_train,x_train)
# plt.show()
plt.figure(figsize=(10,6))
# plot_series(time_valid,x_valid)
# plt.show()
#使用朴素预测法进行预测
naive_forecast = series[split_time-1:-1]
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,naive_forecast)
plt.show()
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid,start=0,end=150)
plot_series(time_valid,naive_forecast,start=1,end=151)
plt.show()
#计算误差
tf.compat.v1.enable_eager_execution()
print(keras.metrics.mean_squared_error(x_valid,naive_forecast).numpy())
print(keras.metrics.mean_absolute_error(x_valid,naive_forecast).numpy())
#使用移动平均法进行预测
def moving_average_forecast(series,window_size):
forecast=[]
for time in range(len(series)-window_size):
forecast.append(series[time:time+window_size].mean())
return np.array(forecast)
moving_avg=moving_average_forecast(series,30)[split_time-30:]
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,moving_avg)
plt.show()
#计算误差
print(keras.metrics.mean_squared_error(x_valid,moving_avg).numpy())
print(keras.metrics.mean_absolute_error(x_valid,moving_avg).numpy())
#去除趋势或周期性
diff_series = (series[365:]-series[:-365])
diff_time=time[365:]
plt.figure(figsize=(10,6))
plot_series(diff_time,diff_series)
plt.show()
#对随机误差做移动平均
diff_moving_avg = moving_average_forecast(diff_series,50)[split_time-365-50:]
plt.figure(figsize=(10,6))
plot_series(time_valid,diff_series[split_time-365:])
plot_series(time_valid,diff_moving_avg)
plt.show()
diff_moving_avg_plus_past = series[split_time-365:-365]+diff_moving_avg
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,diff_moving_avg_plus_past)
plt.show()
#计算误差
print(keras.metrics.mean_squared_error(x_valid,diff_moving_avg_plus_past).numpy())
print(keras.metrics.mean_absolute_error(x_valid,diff_moving_avg_plus_past).numpy())
#使用移动平均法来消除部分噪声
diff_moving_avg_plus_smooth_past=moving_average_forecast(series[split_time-370:-360],50)+diff_moving_avg
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,diff_moving_avg_plus_smooth_past)
plt.show()
#计算误差
print(keras.metrics.mean_squared_error(x_valid,diff_moving_avg_plus_smooth_past).numpy())
print(keras.metrics.mean_absolute_error(x_valid,diff_moving_avg_plus_smooth_past).numpy())
RNN网络样本的生成方法
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
dataset = tf.data.Dataset.range(10)
for val in dataset:
print(val.numpy())
#打散数据
#获得窗口数据,窗口大小为5,shift表示每次平移一个单位,drop_remainder去掉不完整的数据
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5,shift=1,drop_remainder=True)
dataset = dataset.flat_map(lambda window:window.batch(5))
dataset = dataset.map(lambda window: (window[:-1],window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
#设置数据批量,每两个数据为一批次
dataset = dataset.batch(2).prefetch(1)
for x,y in dataset:
print(x.numpy(),y.numpy())
RNN时间序列预测
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#模拟生成时间序列
def plot_series(time,series,format='-',start=0,end=None):
plt.plot(time[start:end],series[start:end],format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope * time
def seasonal_pattern(season_time):
return np.where(
season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time)
)
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude * seasonal_pattern(season_time)
def noise(time,noise_level=1,seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time))*noise_level
time = np.arange(4*365+1,dtype="float32")
baseline=10
series=trend(time,0.1)
baseline=10
amplitude=40
slope=0.05
noise_level=5
#生成一个没有误差的时间序列
series=baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
#生成一个带误差的时间序列
series+=noise(time,noise_level,seed=42)
#切分数据集
split_time=1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid=time[split_time:]
x_valid=series[split_time:]
window_size=20
batch_size=32
shuffle_buffer_size=1000
#模拟生成数据集
#参数说明:序列数据,窗口大小,批次大小,随机缓存大小
#输出:(特征,标签)
def windowed_dataset(series,window_size,batch_size,shuffle_buffer):
dataset = tf.data.Dataset.from_tensor_slices(series)#数据切分
dataset = dataset.window(window_size+1,shift=1,drop_remainder=True)#平移一个时间单位
dataset = dataset.flat_map(lambda window:window.batch(window_size+1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window:(window[:-1],window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
#搭建SimpleRNN神经网络,使用LR_scheduler机制调整学习率
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)
train_set = windowed_dataset(x_train,window_size,batch_size=128,shuffle_buffer=shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x:tf.expand_dims(x,axis=-1),
input_shape=[None]),
tf.keras.layers.SimpleRNN(40,return_sequences=True),
tf.keras.layers.SimpleRNN(40),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x:x*100.0)
])
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
lambda epoch:1e-8 * 10**(epoch/20) #学习率的变化公式
)
optimizer = tf.keras.optimizers.SGD(lr=5e-5,momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"]) #平均绝对误差
history = model.fit(train_set,epochs=100,callbacks=[lr_schedule])
plt.semilogx(history.history["lr"],history.history["loss"])
plt.axis([1e-8,1e-4,0,30])
plt.show()
#预测
forecast=[]
for time in range(len(series)-window_size):
forecast.append(model.predict(series[time:time+window_size][np.newaxis]))
forecast=forecast[split_time-window_size:]
results = np.array(forecast)[:,0,0]
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,results)
print(tf.keras.metrics.mean_absolute_error(x_valid,results).numpy())
#显示训练集和测试集的loss和mae
mae = history.history['mae']
loss=history.history['loss']
epochs=range(len(loss))
plt.plot(epochs,mae,'r')
plt.plot(epochs,loss,'r')
plt.title('MAE and Loss')
plt.xlabel('Epochs')
plt.ylabel("Accuracy")
plt.legend(["MAE","Loss"])
plt.figure()
plt.show()
epochs_zoom=epochs[20:]
mae_zoom=mae[20:]
loss_zoom=loss[20:]
plt.plot(epochs_zoom,mae_zoom,'r')
plt.plot(epochs_zoom,loss_zoom,'b')
plt.title('MAE and Loss')
plt.xlabel('Epochs')
plt.ylabel("Accuracy")
plt.legend(["MAE","Loss"])
plt.figure()
plt.show()
结果显示
双向LSTM时间预测
LSTM比RNN的效果好
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#模拟生成时间序列
def plot_series(time,series,format='-',start=0,end=None):
plt.plot(time[start:end],series[start:end],format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
plt.show()
def trend(time,slope=0):
return slope * time
def seasonal_pattern(season_time):
return np.where(
season_time<0.4,
np.cos(season_time*2*np.pi),
1/np.exp(3*season_time)
)
def seasonality(time,period,amplitude=1,phase=0):
season_time=((time+phase)%period)/period
return amplitude * seasonal_pattern(season_time)
def noise(time,noise_level=1,seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time))*noise_level
time = np.arange(4*365+1,dtype="float32")
baseline=10
series=trend(time,0.1)
baseline=10
amplitude=40
slope=0.05
noise_level=5
#生成一个没有误差的时间序列
series=baseline+trend(time,slope)+seasonality(time,period=365,amplitude=amplitude)
#生成一个带误差的时间序列
series+=noise(time,noise_level,seed=42)
#切分数据集
split_time=1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid=time[split_time:]
x_valid=series[split_time:]
window_size=20
batch_size=32
shuffle_buffer_size=1000
#模拟生成数据集
#参数说明:序列数据,窗口大小,批次大小,随机缓存大小
#输出:(特征,标签)
def windowed_dataset(series,window_size,batch_size,shuffle_buffer):
dataset = tf.data.Dataset.from_tensor_slices(series)#数据切分
dataset = dataset.window(window_size+1,shift=1,drop_remainder=True)#平移一个时间单位
dataset = dataset.flat_map(lambda window:window.batch(window_size+1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window:(window[:-1],window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
#搭建SimpleRNN神经网络,使用LR_scheduler机制调整学习率
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)
train_set = windowed_dataset(x_train,window_size,batch_size=128,shuffle_buffer=shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x:tf.expand_dims(x,axis=-1),
input_shape=[None]),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32,return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x:x*100.0)
])
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
lambda epoch:1e-8 * 10**(epoch/20) #学习率的变化公式
)
optimizer = tf.keras.optimizers.SGD(lr=1e-8,momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"]) #平均绝对误差
history = model.fit(train_set,epochs=100,callbacks=[lr_schedule])
plt.semilogx(history.history["lr"],history.history["loss"])
plt.axis([1e-8,1e-4,0,30])
plt.show()
#预测
forecast=[]
for time in range(len(series)-window_size):
forecast.append(model.predict(series[time:time+window_size][np.newaxis]))
forecast=forecast[split_time-window_size:]
results = np.array(forecast)[:,0,0]
plt.figure(figsize=(10,6))
plot_series(time_valid,x_valid)
plot_series(time_valid,results)
print(tf.keras.metrics.mean_absolute_error(x_valid,results).numpy())
#显示训练集和测试集的loss和mae
mae = history.history['mae']
loss=history.history['loss']
epochs=range(len(loss))
plt.plot(epochs,mae,'r')
plt.plot(epochs,loss,'r')
plt.title('MAE and Loss')
plt.xlabel('Epochs')
plt.ylabel("Accuracy")
plt.legend(["MAE","Loss"])
plt.figure()
plt.show()
epochs_zoom=epochs[20:]
mae_zoom=mae[20:]
loss_zoom=loss[20:]
plt.plot(epochs_zoom,mae_zoom,'r')
plt.plot(epochs_zoom,loss_zoom,'b')
plt.title('MAE and Loss')
plt.xlabel('Epochs')
plt.ylabel("Accuracy")
plt.legend(["MAE","Loss"])
plt.figure()
plt.show()