import pandas as pd
import numpy as np
##时序数据
data= pd.DataFrame({
'ds': ['2024-01-20 00:00:00','2024-01-20 01:15:00','2024-01-20 00:15:00'],
'y':[0.1,0.2,0.3]
})
# 确保ds是datetime类型
data['ds'] = pd.to_datetime(data['ds'])
#按时间排序
data.sort_values(ascending=True,by='ds',inplace=True)
print(data)
#重采样
data.set_index('ds',inplace=True)
data=data.resample(rule='15T').mean().reset_index()
print(data)
#缺失值线性插值
data['y']=data['y'].interpolate(method='slinear')
print(data)
##结果
“”
ds y
0 2024-01-20 00:00:00 0.1
2 2024-01-20 00:15:00 0.3
1 2024-01-20 01:15:00 0.2
ds y
0 2024-01-20 00:00:00 0.1
1 2024-01-20 00:15:00 0.3
2 2024-01-20 00:30:00 NaN
3 2024-01-20 00:45:00 NaN
4 2024-01-20 01:00:00 NaN
5 2024-01-20 01:15:00 0.2
ds y
0 2024-01-20 00:00:00 0.100
1 2024-01-20 00:15:00 0.300
2 2024-01-20 00:30:00 0.275
3 2024-01-20 00:45:00 0.250
4 2024-01-20 01:00:00 0.225
5 2024-01-20 01:15:00 0.200
“”
时间序列预测数据预处理3:缺失时间点补全
最新推荐文章于 2024-05-26 07:45:00 发布