import pandas as pd import numpy as np #生成时间序列 rng = pd.date_range('2016/1/1', periods=20, freq='D') time = pd.Series(np.random.rand(20), index=rng) #print(time) #过滤数据 time.truncate(before='2016-1-10') #print(time) #数据重采样 ''' 1.时间数据由一个频率转换至另一个频率 2.降采样 3.升采样 ''' rng = pd.date_range('1/1/2011', periods=90, freq='D') ts = pd.Series(np.random.randn(len(rng)), index=rng) #print(ts.head()) #转成成以月为单位 #ts.resample('M').sum() day3Ts = ts.resample('3D').sum() #print(ts.resample('3D').mean()) #插值填充 #print(day3Ts.resample('D').asfreq()) #三种插值方法 ''' 1.ffill 空值取前面的值 2.bfill 控制取后面的值 3.interporate 线性取值 ''' #print(day3Ts.resample('D').ffill(1))#对多少个NaN填充 ''' 2011-01-01 -1.376295 2011-01-02 -1.376295 2011-01-03 NaN ''' #print(day3Ts.resample('D').bfill(1)) ''' 2011-01-01 0.948138 2011-01-02 NaN 2011-01-03 0.560229 2011-01-04 0.560229 ''' #print(day3Ts.resample('D').interpolate('linear')) ''' 2011-01-01 0.453659 2011-01-02 0.993409 2011-01-03 1.533158 2011-01-04 2.072907 ''' # 滑动窗口 ''' 预测时求一段时间的平均值 ''' df = pd.Series(np.random.randn(600), index=pd.date_range('7/1/2016',freq='D', periods=600)) #print(df.head()) #r = df.rolling(window=10)#窗口大小为10 #r.max,r.median,r.std,r.skew,r.sum,r.var' #print(r.mean()) ''' 2016-07-01 NaN 2016-07-02 NaN 2016-07-03 NaN 2016-07-04 NaN 2016-07-05 NaN 2016-07-06 NaN 2016-07-07 NaN 2016-07-08 NaN 2016-07-09 NaN 2016-07-10 0.437476 2016-07-11 0.042982 ''' import matplotlib.pyplot as plt df = pd.Series(np.random.randn(600), index=pd.date_range('7/1/2016',freq='D', periods=600)) plt.figure(figsize=(15, 5)) df.plot(style='r--') df.rolling(window=10).mean().plot(style='b')
基于pandas的时间序列处理方法
最新推荐文章于 2023-01-06 15:23:00 发布