时间序列

#时间序列
#日期和时间数据类型及工具
from datetime import datetime
import pandas as pd
import numpy as np
now = datetime.now()
now
datetime.datetime(2018, 11, 7, 15, 6, 50, 327155)
now.year,now.month,now.day
(2018, 11, 7)
delta = datetime(2011,1,7) - datetime(2008,6,24,8,15)
delta#时间差
datetime.timedelta(days=926, seconds=56700)
delta.days
926
delta.seconds
56700
from datetime import timedelta#时间加减
start = datetime(2011,1,7)
start + timedelta(12)
datetime.datetime(2011, 1, 19, 0, 0)
start - 2*timedelta(12)
datetime.datetime(2010, 12, 14, 0, 0)
#字符串和datetime的相互转换
stamp = datetime(2011,1,3)
str(stamp)
'2011-01-03 00:00:00'
stamp.strftime('%Y-%m-%d,%H')
'2011-01-03,00'
value ='2011-01-03'
datetime.strptime(value,'%Y-%m-%d')
datetime.datetime(2011, 1, 3, 0, 0)
datestrs = ['07/06/2011','08/06/2011']
[datetime.strptime(x,'%m/%d/%Y')for x in datestrs]
[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]
from dateutil.parser import parse
parse('2011-01-03')
datetime.datetime(2011, 1, 3, 0, 0)
 parse('Jan 31, 1997 10:45 PM')
datetime.datetime(1997, 1, 31, 22, 45)
parse('6/12/2011',dayfirst=True)
datetime.datetime(2011, 12, 6, 0, 0)
datestrs = ['2011-01-06 12:00:00','2011-01-01 00:00:00']
pd.to_datetime(datestrs)
DatetimeIndex(['2011-01-06 12:00:00', '2011-01-01 00:00:00'], dtype='datetime64[ns]', freq=None)
idx = pd.to_datetime(datestrs+[None])
idx
DatetimeIndex(['2011-01-06 12:00:00', '2011-01-01 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)
idx[2]
NaT
pd.isnull(idx)
array([False, False,  True])
#时间序列基础
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
             datetime(2011, 1, 7), datetime(2011, 1, 8),
             datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6),index=dates)
ts
2011-01-02   -0.889247
2011-01-05    0.472529
2011-01-07    1.458923
2011-01-08    0.125811
2011-01-10    0.878417
2011-01-12   -1.347855
dtype: float64
ts.index
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)
ts[::2]
2011-01-02   -0.889247
2011-01-07    1.458923
2011-01-10    0.878417
dtype: float64
ts + ts[::2]
2011-01-02   -1.778493
2011-01-05         NaN
2011-01-07    2.917845
2011-01-08         NaN
2011-01-10    1.756833
2011-01-12         NaN
dtype: float64
ts.index.dtype
dtype('<M8[ns]')
stamp = ts.index[0]
stamp
Timestamp('2011-01-02 00:00:00')
#索引,选取,子集构造
stamp = ts.index[2]
ts[stamp]
1.4589225544193962
ts['1/10/2011']
0.8784166722932014
ts['20110110']
0.8784166722932014
longer_ts = pd.Series(np.random.randn(1000),
                     index = pd.date_range('1/1/2000',periods=1000))
longer_ts
2000-01-01   -1.016306
2000-01-02   -1.619848
2000-01-03   -1.030577
2000-01-04    0.782235
2000-01-05    0.380402
2000-01-06    1.867834
2000-01-07   -0.966920
2000-01-08    1.116943
2000-01-09    0.286488
2000-01-10    0.609803
2000-01-11    1.426519
2000-01-12    0.424936
2000-01-13   -1.196588
2000-01-14    0.906847
2000-01-15    0.266331
2000-01-16   -0.704134
2000-01-17    0.645805
2000-01-18    0.724744
2000-01-19    1.271031
2000-01-20   -0.622831
2000-01-21   -0.378951
2000-01-22   -0.978048
2000-01-23   -0.204337
2000-01-24    0.069203
2000-01-25    0.245822
2000-01-26    1.696828
2000-01-27   -0.754646
2000-01-28   -0.728572
2000-01-29    0.076263
2000-01-30    2.723447
                ...   
2002-08-28    1.168994
2002-08-29    1.212782
2002-08-30   -0.568776
2002-08-31   -1.392654
2002-09-01   -0.353161
2002-09-02   -0.779683
2002-09-03   -0.740422
2002-09-04    0.582901
2002-09-05    0.543077
2002-09-06   -1.046154
2002-09-07    0.031932
2002-09-08   -1.062249
2002-09-09    0.911971
2002-09-10    0.312633
2002-09-11   -0.487974
2002-09-12   -0.681178
2002-09-13    0.606661
2002-09-14   -0.246001
2002-09-15   -0.375383
2002-09-16    1.651613
2002-09-17   -0.233456
2002-09-18   -1.442711
2002-09-19    0.620685
2002-09-20    0.731800
2002-09-21   -0.151031
2002-09-22    0.752516
2002-09-23   -0.788909
2002-09-24   -0.388959
2002-09-25   -1.466894
2002-09-26   -0.429725
Freq: D, Length: 1000, dtype: float64
longer_ts['2001']
2001-01-01   -0.515375
2001-01-02   -0.438227
2001-01-03    0.678962
2001-01-04    2.457332
2001-01-05    0.007770
2001-01-06    1.532512
2001-01-07    0.237520
2001-01-08    1.425760
2001-01-09    0.684237
2001-01-10   -0.340322
2001-01-11    0.232236
2001-01-12    1.615747
2001-01-13    0.365120
2001-01-14    0.324808
2001-01-15    0.015621
2001-01-16   -0.177306
2001-01-17    0.561159
2001-01-18    0.516523
2001-01-19   -1.058772
2001-01-20   -0.136410
2001-01-21   -0.016760
2001-01-22   -0.882206
2001-01-23   -0.976625
2001-01-24   -0.189113
2001-01-25   -0.984928
2001-01-26    1.502973
2001-01-27    0.058032
2001-01-28   -0.543902
2001-01-29    0.090189
2001-01-30   -0.505144
                ...   
2001-12-02   -0.553024
2001-12-03   -0.405266
2001-12-04   -0.567354
2001-12-05    0.094730
2001-12-06   -0.548641
2001-12-07   -0.592105
2001-12-08    1.558771
2001-12-09   -1.049105
2001-12-10    2.094203
2001-12-11    0.067828
2001-12-12    0.094673
2001-12-13   -0.883690
2001-12-14    0.216863
2001-12-15   -0.011448
2001-12-16   -0.276283
2001-12-17    2.146709
2001-12-18    0.123471
2001-12-19    1.448596
2001-12-20   -0.990181
2001-12-21   -0.723119
2001-12-22    0.506099
2001-12-23   -1.410846
2001-12-24    0.077442
2001-12-25   -0.586892
2001-12-26    0.302183
2001-12-27    0.821904
2001-12-28   -0.669978
2001-12-29   -0.238159
2001-12-30    0.177509
2001-12-31   -1.527928
Freq: D, Length: 365, dtype: float64
longer_ts['2001-05']
2001-05-01   -0.434947
2001-05-02   -1.013492
2001-05-03   -1.370608
2001-05-04   -0.278787
2001-05-05   -0.527465
2001-05-06   -0.794392
2001-05-07   -0.688821
2001-05-08   -0.174579
2001-05-09    0.327301
2001-05-10    0.231338
2001-05-11   -1.600751
2001-05-12   -1.305738
2001-05-13    0.198962
2001-05-14    1.608539
2001-05-15   -1.017836
2001-05-16    1.837142
2001-05-17   -0.213202
2001-05-18   -0.372286
2001-05-19    0.139703
2001-05-20    1.092866
2001-05-21    0.089208
2001-05-22    0.404983
2001-05-23    2.775343
2001-05-24    0.077524
2001-05-25   -0.143234
2001-05-26   -0.559451
2001-05-27    0.753692
2001-05-28    0.373406
2001-05-29    0.566619
2001-05-30    0.539838
2001-05-31    0.044411
Freq: D, dtype: float64
ts[datetime(2011,1,7):]
2011-01-07    1.458923
2011-01-08    0.125811
2011-01-10    0.878417
2011-01-12   -1.347855
dtype: float64
ts['1/6/2011':'1/11/2011']
2011-01-07    1.458923
2011-01-08    0.125811
2011-01-10    0.878417
dtype: float64
ts.truncate(after='1/9/2011')
2011-01-02   -0.889247
2011-01-05    0.472529
2011-01-07    1.458923
2011-01-08    0.125811
dtype: float64
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                         index=dates,
                          columns=['Colorado', 'Texas',
                                    'New York', 'Ohio'])
long_df.loc['5-2001']


ColoradoTexasNew YorkOhio
2001-05-020.5346720.4715470.1028400.307940
2001-05-09-1.0781030.024223-1.1527050.382906
2001-05-16-1.149617-0.889391-2.1608580.730430
2001-05-230.970990-0.440826-0.329939-1.671497
2001-05-300.1228030.703133-0.176191-0.722155
#带重复索引的时间序列
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                              '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5),index=dates)
dup_ts
2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32
dup_ts['1/3/2000']
4
dup_ts['1/2/2000']
2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32
grouped = dup_ts.groupby(level=0)
grouped.mean()
2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32
grouped.count()
2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64
#日期范围,频率以及移动
ts
2011-01-02   -0.889247
2011-01-05    0.472529
2011-01-07    1.458923
2011-01-08    0.125811
2011-01-10    0.878417
2011-01-12   -1.347855
dtype: float64
resample = ts.resample('D')#D:每天
DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]
#生成日期范围
index = pd.date_range('2012-04-01','2012-06-01')
index
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
               '2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',
               '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')
pd.date_range(start='2012-04-01',periods=20)
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')
pd.date_range(end='2012-06-01',periods=20)
DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')
pd.date_range('2000-01-01','2000-12-01',freq='BM')
DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')
pd.date_range('2012-05-20 12:56:31',periods=5)
DatetimeIndex(['2012-05-20 12:56:31', '2012-05-21 12:56:31',
               '2012-05-22 12:56:31', '2012-05-23 12:56:31',
               '2012-05-24 12:56:31'],
              dtype='datetime64[ns]', freq='D')
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)
DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')
#频率和日期偏移量
from pandas.tseries.offsets import Hour,Minute
hour =Hour()
hour
<Hour>
four_hours = Hour(4)
four_hours
<4 * Hours>
 pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')
Hour(2) + Minute(30)
<150 * Minutes>
pd.date_range('2000-01-01', periods=10, freq='1h30min')
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')
#WOM日期
rng = pd.date_range('2012-01-01', '2012-09-01', freq='WOM-3FRI')#每月第三个星期五
list(rng)

[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]
#移动数据
ts = pd.Series(np.random.randn(4),
                   index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts
2000-01-31    0.177721
2000-02-29   -0.202120
2000-03-31   -0.701722
2000-04-30   -0.938393
Freq: M, dtype: float64
ts.shift(2)
2000-01-31         NaN
2000-02-29         NaN
2000-03-31    0.177721
2000-04-30   -0.202120
Freq: M, dtype: float64
#移动时间戳
ts.shift(2, freq='M')
2000-03-31    0.177721
2000-04-30   -0.202120
2000-05-31   -0.701722
2000-06-30   -0.938393
Freq: M, dtype: float64
#通过偏移量对日期进行位移
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + 3 * Day()
Timestamp('2011-11-20 00:00:00')
 now + MonthEnd()
Timestamp('2011-11-30 00:00:00')
offset = MonthEnd()
offset.rollforward(now)
Timestamp('2011-11-30 00:00:00')
offset.rollback(now)
Timestamp('2011-10-31 00:00:00')
ts = pd.Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts
2000-01-15    2.441955
2000-01-19    0.062600
2000-01-23   -0.300315
2000-01-27    1.181993
2000-01-31    0.572116
2000-02-04    1.209064
2000-02-08   -0.996346
2000-02-12    0.890940
2000-02-16    0.194561
2000-02-20    0.233563
2000-02-24    0.380644
2000-02-28    0.117173
2000-03-03   -0.088754
2000-03-07   -0.206632
2000-03-11   -1.792495
2000-03-15   -0.443086
2000-03-19   -0.229585
2000-03-23   -1.429532
2000-03-27   -1.059541
2000-03-31   -1.274479
Freq: 4D, dtype: float64
ts.groupby(offset.rollforward).mean()
2000-01-31    0.791670
2000-02-29    0.289943
2000-03-31   -0.815513
dtype: float64
#时区处理
import pytz
pytz.common_timezones[-5:]
['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']
#时区本地化和转换
rng = pd.date_range('3/9/2012 9:30',periods=6,freq='D')
ts = pd.Series(np.random.randn(len(rng)),index=rng)
ts
2012-03-09 09:30:00   -0.351911
2012-03-10 09:30:00    0.693718
2012-03-11 09:30:00    0.634782
2012-03-12 09:30:00    0.025861
2012-03-13 09:30:00   -0.142429
2012-03-14 09:30:00   -0.464828
Freq: D, dtype: float64
print(ts.index.tz)
None
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')
DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')
ts_utc = ts.tz_localize('UTC')
ts_utc
2012-03-09 09:30:00+00:00   -0.351911
2012-03-10 09:30:00+00:00    0.693718
2012-03-11 09:30:00+00:00    0.634782
2012-03-12 09:30:00+00:00    0.025861
2012-03-13 09:30:00+00:00   -0.142429
2012-03-14 09:30:00+00:00   -0.464828
Freq: D, dtype: float64
ts_utc.tz_convert('America/New_York')
2012-03-09 04:30:00-05:00   -0.351911
2012-03-10 04:30:00-05:00    0.693718
2012-03-11 05:30:00-04:00    0.634782
2012-03-12 05:30:00-04:00    0.025861
2012-03-13 05:30:00-04:00   -0.142429
2012-03-14 05:30:00-04:00   -0.464828
Freq: D, dtype: float64
#操作时区意识型Timestamp对象
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')
Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow
Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')
stamp_utc.value
1299902400000000000
#不同时区之间的运算
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
2012-03-07 09:30:00   -0.454626
2012-03-08 09:30:00   -0.102485
2012-03-09 09:30:00    0.309211
2012-03-12 09:30:00    0.249732
2012-03-13 09:30:00   -0.254658
2012-03-14 09:30:00   -0.503452
2012-03-15 09:30:00    1.722681
2012-03-16 09:30:00    0.090046
2012-03-19 09:30:00    1.414297
2012-03-20 09:30:00    0.126887
Freq: B, dtype: float64
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2 
result.index
DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')
#时期及其算术运算
p = pd.Period(2007,freq='A-DEC')
p
Period('2007', 'A-DEC')
p + 5
Period('2012', 'A-DEC')
p - 2
Period('2005', 'A-DEC')
pd.Period('2014',freq='A-DEC') - p
7
rng = pd.period_range('2000-01-01','2000-06-30',freq='M')
rng
PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')
pd.Series(np.random.randn(6),index=rng)
2000-01    0.143415
2000-02   -0.355323
2000-03   -1.136658
2000-04    0.533440
2000-05    0.504703
2000-06   -0.723782
Freq: M, dtype: float64
values = ['2001Q3','2002Q2','2003Q1']
index = pd.PeriodIndex(values,freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')
#时期的频率转换
p = pd.Period('2007',freq='A-DEC')
p
Period('2007', 'A-DEC')
p.asfreq('M',how='start')
Period('2007-01', 'M')
p.asfreq('M',how='end')
Period('2007-12', 'M')
#按季度计算的时期频率
p = pd.Period('2012Q4',freq='Q-JAN')
p
Period('2012Q4', 'Q-JAN')
#将Timestamp转化为Period
rng = pd.date_range('2000-01-01',periods=3,freq='M')
ts = pd.Series(np.random.randn(3),index=rng)
ts
2000-01-31   -0.510447
2000-02-29   -0.624095
2000-03-31   -1.085284
Freq: M, dtype: float64
pts=ts.to_period()
pts
2000-01   -0.510447
2000-02   -0.624095
2000-03   -1.085284
Freq: M, dtype: float64
rng = pd.date_range('1/29/2000',periods=6,freq='D')
ts2 = pd.Series(np.random.randn(6),index=rng)
ts2

2000-01-29   -0.869026
2000-01-30    0.206545
2000-01-31   -0.147305
2000-02-01   -0.151367
2000-02-02    0.950917
2000-02-03    1.198447
Freq: D, dtype: float64
ts2.to_period('M')
2000-01   -0.869026
2000-01    0.206545
2000-01   -0.147305
2000-02   -0.151367
2000-02    0.950917
2000-02    1.198447
Freq: M, dtype: float64
pts = ts2.to_period()
pts
2000-01-29   -0.869026
2000-01-30    0.206545
2000-01-31   -0.147305
2000-02-01   -0.151367
2000-02-02    0.950917
2000-02-03    1.198447
Freq: D, dtype: float64
pts.to_timestamp(how='end')
2000-01-29   -0.869026
2000-01-30    0.206545
2000-01-31   -0.147305
2000-02-01   -0.151367
2000-02-02    0.950917
2000-02-03    1.198447
Freq: D, dtype: float64
#通过数组创建PeriodIndex
data = pd.read_csv('examples/macrodata.csv')
data.head(5)
yearquarterrealgdprealconsrealinvrealgovtrealdpicpim1tbilrateunemppopinflrealint
01959.01.02710.3491707.4286.898470.0451886.928.98139.72.825.8177.1460.000.00
11959.02.02778.8011733.7310.859481.3011919.729.15141.73.085.1177.8302.340.74
21959.03.02775.4881751.8289.226491.2601916.429.35140.53.825.3178.6572.741.09
31959.04.02785.2041753.7299.356484.0521931.329.37140.04.335.6179.3860.274.06
41960.01.02847.6991770.5331.722462.1991955.529.54139.63.505.2180.0072.311.19
data.year
0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
5      1960.0
6      1960.0
7      1960.0
8      1961.0
9      1961.0
10     1961.0
11     1961.0
12     1962.0
13     1962.0
14     1962.0
15     1962.0
16     1963.0
17     1963.0
18     1963.0
19     1963.0
20     1964.0
21     1964.0
22     1964.0
23     1964.0
24     1965.0
25     1965.0
26     1965.0
27     1965.0
28     1966.0
29     1966.0
        ...  
173    2002.0
174    2002.0
175    2002.0
176    2003.0
177    2003.0
178    2003.0
179    2003.0
180    2004.0
181    2004.0
182    2004.0
183    2004.0
184    2005.0
185    2005.0
186    2005.0
187    2005.0
188    2006.0
189    2006.0
190    2006.0
191    2006.0
192    2007.0
193    2007.0
194    2007.0
195    2007.0
196    2008.0
197    2008.0
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64
data.quarter
0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
5      2.0
6      3.0
7      4.0
8      1.0
9      2.0
10     3.0
11     4.0
12     1.0
13     2.0
14     3.0
15     4.0
16     1.0
17     2.0
18     3.0
19     4.0
20     1.0
21     2.0
22     3.0
23     4.0
24     1.0
25     2.0
26     3.0
27     4.0
28     1.0
29     2.0
      ... 
173    2.0
174    3.0
175    4.0
176    1.0
177    2.0
178    3.0
179    4.0
180    1.0
181    2.0
182    3.0
183    4.0
184    1.0
185    2.0
186    3.0
187    4.0
188    1.0
189    2.0
190    3.0
191    4.0
192    1.0
193    2.0
194    3.0
195    4.0
196    1.0
197    2.0
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64
index = pd.PeriodIndex(year=data.year,quarter=data.quarter,freq='Q-DEC')
index
PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')
data.index = index
data
yearquarterrealgdprealconsrealinvrealgovtrealdpicpim1tbilrateunemppopinflrealint
1959Q11959.01.02710.3491707.4286.898470.0451886.928.980139.72.825.8177.1460.000.00
1959Q21959.02.02778.8011733.7310.859481.3011919.729.150141.73.085.1177.8302.340.74
1959Q31959.03.02775.4881751.8289.226491.2601916.429.350140.53.825.3178.6572.741.09
1959Q41959.04.02785.2041753.7299.356484.0521931.329.370140.04.335.6179.3860.274.06
1960Q11960.01.02847.6991770.5331.722462.1991955.529.540139.63.505.2180.0072.311.19
1960Q21960.02.02834.3901792.9298.152460.4001966.129.550140.22.685.2180.6710.142.55
1960Q31960.03.02839.0221785.8296.375474.6761967.829.750140.92.365.6181.5282.70-0.34
1960Q41960.04.02802.6161788.2259.764476.4341966.629.840141.12.296.3182.2871.211.08
1961Q11961.01.02819.2641787.7266.405475.8541984.529.810142.12.376.8182.992-0.402.77
1961Q21961.02.02872.0051814.3286.246480.3282014.429.920142.92.297.0183.6911.470.81
1961Q31961.03.02918.4191823.1310.227493.8282041.929.980144.12.326.8184.5240.801.52
1961Q41961.04.02977.8301859.6315.463502.5212082.030.040145.22.606.2185.2420.801.80
1962Q11962.01.03031.2411879.4334.271520.9602101.730.210146.42.735.6185.8742.260.47
1962Q21962.02.03064.7091902.5331.039523.0662125.230.220146.52.785.5186.5380.132.65
1962Q31962.03.03093.0471917.9336.962538.8382137.030.380146.72.785.6187.3232.110.67
1962Q41962.04.03100.5631945.1325.650535.9122154.630.440148.32.875.5188.0130.792.08
1963Q11963.01.03141.0871958.2343.721522.9172172.530.480149.72.905.8188.5800.532.38
1963Q21963.02.03180.4471976.9348.730518.1082193.130.690151.33.035.7189.2422.750.29
1963Q31963.03.03240.3322003.8360.102546.8932217.930.750152.63.385.5190.0280.782.60
1963Q41963.04.03264.9672020.6364.534532.3832254.630.940153.73.525.6190.6682.461.06
1964Q11964.01.03338.2462060.5379.523529.6862299.630.950154.83.515.5191.2450.133.38
1964Q21964.02.03376.5872096.7377.778526.1752362.131.020156.83.475.2191.8890.902.57
1964Q31964.03.03422.4692135.2386.754522.0082392.731.120159.23.535.0192.6311.292.25
1964Q41964.04.03431.9572141.2389.910514.6032420.431.280160.73.765.0193.2232.051.71
1965Q11965.01.03516.2512188.8429.145508.0062447.431.380162.03.934.9193.7091.282.65
1965Q21965.02.03563.9602213.0429.119508.9312474.531.580163.13.844.7194.3032.541.30
1965Q31965.03.03636.2852251.0444.444529.4462542.631.650166.03.934.4194.9970.893.04
1965Q41965.04.03724.0142314.3446.493544.1212594.131.880169.14.354.1195.5392.901.46
1966Q11966.01.03815.4232348.5484.244556.5932618.432.280171.84.623.9195.9994.99-0.37
1966Q21966.02.03828.1242354.5475.408571.3712624.732.450170.34.653.8196.5602.102.55
.............................................
2002Q22002.02.011538.7707997.81810.779774.4088658.9180.0001199.51.705.8288.0281.560.14
2002Q32002.03.011596.4308052.01814.531786.6738629.2181.2001204.01.615.7288.7832.66-1.05
2002Q42002.04.011598.8248080.61813.219799.9678649.6182.6001226.81.205.8289.4213.08-1.88
2003Q12003.01.011645.8198122.31813.141800.1968681.3183.2001248.41.145.9290.0191.31-0.17
2003Q22003.02.011738.7068197.81823.698838.7758812.5183.7001287.90.966.2290.7041.09-0.13
2003Q32003.03.011935.4618312.11889.883839.5988935.4184.9001297.30.946.1291.4492.60-1.67
2003Q42003.04.012042.8178358.01959.783845.7228986.4186.3001306.10.905.8292.0573.02-2.11
2004Q12004.01.012127.6238437.61970.015856.5709025.9187.4001332.10.945.7292.6352.35-1.42
2004Q22004.02.012213.8188483.22055.580861.4409115.0189.1001340.51.215.6293.3103.61-2.41
2004Q32004.03.012303.5338555.82082.231876.3859175.9190.8001361.01.635.4294.0663.58-1.95
2004Q42004.04.012410.2828654.22125.152865.5969303.4191.8001366.62.205.4294.7412.090.11
2005Q12005.01.012534.1138719.02170.299869.2049189.6193.8001357.82.695.3295.3084.15-1.46
2005Q22005.02.012587.5358802.92131.468870.0449253.0194.7001366.63.015.1295.9941.851.16
2005Q32005.03.012683.1538865.62154.949890.3949308.0199.2001375.03.525.0296.7709.14-5.62
2005Q42005.04.012748.6998888.52232.193875.5579358.7199.4001380.64.004.9297.4350.403.60
2006Q12006.01.012915.9388986.62264.721900.5119533.8200.7001380.54.514.7298.0612.601.91
2006Q22006.02.012962.4629035.02261.247892.8399617.3202.7001369.24.824.7298.7663.970.85
2006Q32006.03.012965.9169090.72229.636892.0029662.5201.9001369.44.904.7299.593-1.586.48
2006Q42006.04.013060.6799181.62165.966894.4049788.8203.5741373.64.924.4300.3203.301.62
2007Q12007.01.013099.9019265.12132.609882.7669830.2205.9201379.74.954.5300.9774.580.36
2007Q22007.02.013203.9779291.52162.214898.7139842.7207.3381370.04.724.5301.7142.751.97
2007Q32007.03.013321.1099335.62166.491918.9839883.9209.1331379.24.004.7302.5093.450.55
2007Q42007.04.013391.2499363.62123.426925.1109886.2212.4951377.43.014.8303.2046.38-3.37
2008Q12008.01.013366.8659349.62082.886943.3729826.8213.9971384.01.564.9303.8032.82-1.26
2008Q22008.02.013415.2669351.02026.518961.28010059.0218.6101409.31.745.4304.4838.53-6.79
2008Q32008.03.013324.6009267.71990.693991.5519838.3216.8891474.71.176.0305.270-3.164.33
2008Q42008.04.013141.9209195.31857.6611007.2739920.4212.1741576.50.126.9305.952-8.798.91
2009Q12009.01.012925.4109209.21558.494996.2879926.4212.6711592.80.228.1306.5470.94-0.71
2009Q22009.02.012901.5049189.01456.6781023.52810077.5214.4691653.60.189.2307.2263.37-3.19
2009Q32009.03.012990.3419256.01486.3981044.08810040.6216.3851673.90.129.6308.0133.56-3.44

203 rows × 14 columns

#重采样及频率转换
#重采样(resampling)指的是将时间序列从一个频率转换到另一个频率的处理过程
rng = pd.date_range('2000-01-01',periods=100,freq='D')
ts = pd.Series(np.random.randn(len(rng)),index=rng)
ts
2000-01-01    0.014440
2000-01-02    0.676634
2000-01-03    0.828413
2000-01-04    1.809659
2000-01-05    0.346060
2000-01-06   -0.906748
2000-01-07   -0.144737
2000-01-08   -0.376248
2000-01-09   -1.811051
2000-01-10    0.422918
2000-01-11   -0.291923
2000-01-12   -0.947770
2000-01-13    2.794186
2000-01-14   -1.314019
2000-01-15   -0.474217
2000-01-16    0.657007
2000-01-17   -1.604424
2000-01-18   -0.387496
2000-01-19    0.493891
2000-01-20   -0.963368
2000-01-21    1.689601
2000-01-22   -0.318659
2000-01-23    0.960378
2000-01-24   -0.241049
2000-01-25   -0.068125
2000-01-26   -0.755140
2000-01-27    1.123554
2000-01-28    0.775620
2000-01-29    0.141545
2000-01-30    0.200428
                ...   
2000-03-11    0.695305
2000-03-12   -0.418060
2000-03-13   -0.143606
2000-03-14   -1.265208
2000-03-15    0.016866
2000-03-16   -1.072653
2000-03-17    0.491403
2000-03-18   -0.973830
2000-03-19    1.357179
2000-03-20   -0.856155
2000-03-21    0.594472
2000-03-22   -1.938699
2000-03-23    1.890984
2000-03-24   -0.635768
2000-03-25   -0.239094
2000-03-26    0.281482
2000-03-27   -1.499055
2000-03-28   -1.219709
2000-03-29   -0.996651
2000-03-30    0.779328
2000-03-31   -1.139721
2000-04-01    1.624966
2000-04-02    0.988920
2000-04-03    0.940181
2000-04-04   -0.195757
2000-04-05   -0.140197
2000-04-06   -1.513569
2000-04-07   -1.563758
2000-04-08    0.216563
2000-04-09   -0.857682
Freq: D, Length: 100, dtype: float64
ts.resample('M',kind='Period').mean()
2000-01-31    0.079785
2000-02-29    0.057120
2000-03-31   -0.337746
2000-04-30   -0.055593
Freq: M, dtype: float64
#降采样
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts
2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32
ts.resample('5min',closed='right').sum()
1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32
ts.resample('5min',closed='right',label='right').sum()
2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32
ts.resample('5min').ohlc()
openhighlowclose
2000-01-01 00:00:000404
2000-01-01 00:05:005959
2000-01-01 00:10:0010111011
#通过时期进行重采样
frame = pd.DataFrame(np.random.randn(24, 4),
                         index=pd.period_range('1-2000', '12-2001',
                                              freq='M'),
                        columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

ColoradoTexasNew YorkOhio
2000-010.966322-1.0975200.2781891.351895
2000-02-0.440050-1.486752-0.281330-0.083094
2000-03-0.1489030.0747730.432271-0.606501
2000-04-0.3241021.150940-1.0079970.192719
2000-050.018921-0.463694-0.915002-1.167408
2000-061.0988212.595297-0.2899310.624960
2000-071.173885-1.247856-1.5467601.148165
2000-08-2.465409-0.976749-0.565111-2.461652
2000-09-0.337638-0.8609590.5850420.935459
2000-10-0.422014-0.233506-1.2391201.692123
2000-11-0.0108930.428696-0.574149-0.127551
2000-120.0440931.0852750.9950650.893955
2001-01-1.007087-1.7212420.328790-0.536349
2001-02-1.5257350.3912450.6492801.114608
2001-030.3989160.4236730.4422520.922619
2001-041.1729610.726157-0.860389-0.263490
2001-051.293229-0.104286-0.9737421.284994
2001-06-0.620977-0.2880920.509420-0.775555
2001-07-0.249990-0.0755460.064330-0.077252
2001-080.010249-0.0098221.467907-0.932622
2001-09-0.849710-0.1442580.384650-1.102285
2001-10-1.5943510.5610650.1715800.385832
2001-11-0.1641121.495407-0.688782-1.025762
2001-12-1.3922901.441935-0.1850710.306422
annual_frame = frame.resample('A-DEC').mean()
annual_frame
ColoradoTexasNew YorkOhio
2000-0.070581-0.086005-0.3440690.199423
2001-0.3774080.2246860.109186-0.058237
#移动窗口函数
close_px_all = pd.read_csv('examples/stock_px_2.csv',
                             parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B').ffill()
close_px
AAPLMSFTXOM
2003-01-027.4021.1129.22
2003-01-037.4521.1429.24
2003-01-067.4521.5229.96
2003-01-077.4321.9328.95
2003-01-087.2821.3128.83
2003-01-097.3421.9329.44
2003-01-107.3621.9729.03
2003-01-137.3222.1628.91
2003-01-147.3022.3929.17
2003-01-157.2222.1128.77
2003-01-167.3121.7528.90
2003-01-177.0520.2228.60
2003-01-207.0520.2228.60
2003-01-217.0120.1727.94
2003-01-226.9420.0427.58
2003-01-237.0920.5427.52
2003-01-246.9019.5926.93
2003-01-277.0719.3226.21
2003-01-287.2919.1826.90
2003-01-297.4719.6127.88
2003-01-307.1618.9527.37
2003-01-317.1818.6528.13
2003-02-037.3319.0828.52
2003-02-047.3018.5928.52
2003-02-057.2218.4528.11
2003-02-067.2218.6327.87
2003-02-077.0718.3027.66
2003-02-107.1818.6227.87
2003-02-117.1818.2527.67
2003-02-127.2018.2527.12
............
2011-09-05374.0525.8072.14
2011-09-06379.7425.5171.15
2011-09-07383.9326.0073.65
2011-09-08384.1426.2272.82
2011-09-09377.4825.7471.01
2011-09-12379.9425.8971.84
2011-09-13384.6226.0471.65
2011-09-14389.3026.5072.64
2011-09-15392.9626.9974.01
2011-09-16400.5027.1274.55
2011-09-19411.6327.2173.70
2011-09-20413.4526.9874.01
2011-09-21412.1425.9971.97
2011-09-22401.8225.0669.24
2011-09-23404.3025.0669.31
2011-09-26403.1725.4471.72
2011-09-27399.2625.6772.91
2011-09-28397.0125.5872.07
2011-09-29390.5725.4573.88
2011-09-30381.3224.8972.63
2011-10-03374.6024.5371.15
2011-10-04372.5025.3472.83
2011-10-05378.2525.8973.95
2011-10-06377.3726.3473.89
2011-10-07369.8026.2573.56
2011-10-10388.8126.9476.28
2011-10-11400.2927.0076.27
2011-10-12402.1926.9677.16
2011-10-13408.4327.1876.37
2011-10-14422.0027.2778.11

2292 rows × 3 columns

close_px.AAPL.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1966953de80>

png

close_px.AAPL.rolling(250).mean().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1966a5b68d0>

png

appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()
appl_std250[5:12]
2003-01-09         NaN
2003-01-10         NaN
2003-01-13         NaN
2003-01-14         NaN
2003-01-15    0.077496
2003-01-16    0.074760
2003-01-17    0.112368
Freq: B, Name: AAPL, dtype: float64
appl_std250.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1966a6b34e0>

png


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值