@创建于:20210715
@修改于:20210715
最近在做时序预测,不想把精力放在各种时序数据的处理上,决定自己生成一些时间序列数据。供研究用。
# -*- coding:UTF-8 -*-
# datetime:2021/7/15 10:22
# software: PyCharm
"""
文件说明:
创建时序数据:包括基线、趋势、周期(2种)
"""
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import datetime
class GenerateTimeSeries():
def __init__(self, base=100, trend=0.02, freq='H', n_freq=1000, period=7, time_end=None):
self.base = base
self.trend = trend
self.freq = freq
self.n_freq = n_freq
self.period = period
self.time_end = time_end
self.use_rnd = True
self.period_2nd = self.period * 4
if ('24' in self.freq) or (freq=='H'):
self.freq = 'H'
self.period = 24
self.period_2nd = self.period * 7
elif ('96' in self.freq) or (freq=='15T'):
self.freq = '15T'
self.period = 96
self.period_2nd = self.period * 7
def __str__(self):
return '制造具有基准线、趋势、周期和随机扰动的时序序列。单个变量。'
def get_datetime(self):
if self.time_end is None:
self.time_end = datetime.date.today()
return pd.date_range(end=self.time_end, periods=self.n_freq, freq=self.freq)
def get_base(self):
if self.use_rnd:
# 线性随机 更好的模拟 基础数据的随机变动
return random.uniform(self.base*(1-0.003), self.base*(1+0.003))
else:
return self.base
def get_trend(self, number=1):
if self.use_rnd:
# 线性随机 vs 高斯随机
rnd = random.uniform(-1 * self.base * 0.005, self.base * 0.005)
rnd = random.gauss(mu=0, sigma=self.base * 0.002)
else:
rnd = 0
return self.trend * number + rnd
def get_period(self, number=1):
if self.use_rnd:
# 随机数加载乘数上,对任何周期会产生同等的随机效果。
number = random.gauss(mu=number, sigma=self.period * 0.05)
return self.base * self.trend * np.sin(number * 2 * np.pi / self.period)
# # 改变周期形式,效果差:渐变式周期消失, 因为number是在变大的,这种方式不可取。
# temp = random.gauss(mu=self.period, sigma=self.period * 0.002)
# return self.base*self.trend*np.sin(number * 2 * np.pi / temp)
else:
return self.period
def get_period_2nd(self, number=1):
if self.use_rnd:
# 改变乘数方式,效果好
number = random.gauss(mu=number, sigma=self.period_2nd * 0.01)
return self.base * self.trend * np.sin(number * 2 * np.pi / self.period_2nd)
else:
return self.period
def get_rnd(self):
if self.use_rnd:
return random.gauss(mu=0, sigma=self.base * 0.002)
else:
return 0
def get_value(self):
data_list = []
i = 1
while i <= self.n_freq:
data_point = self.get_base() + self.get_trend(i) + \
self.get_period_2nd(number=i) + self.get_rnd()
data_list.append(data_point)
i += 1
return data_list
def generate_time_series(self):
df = self.get_datetime()
df = pd.Series(self.get_value(), index=df)
return df
gts = GenerateTimeSeries()
df_ts = gts.generate_time_series()
df_ts.plot()
plt.show()
结果图如下:
还需持续完善~