In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
now = datetime.now() #获取当前时间
now.year, now.month, now.day
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta #timedelta表示两个datetime对象之前的时间差
from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(12) #加上时间差
Out[2]:
datetime.datetime(2011, 1, 19, 0, 0)
In [3]:
stamp = datetime(2011, 1, 3)
str(stamp) #时间以字符显示
stamp.strftime('%Y-%m-%d') #格式化字符串
value = '2011-01-03'
datetime.strptime(value, '%Y-%m-%d') #字符串转日期
datestrs = ['7/5/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]
from dateutil.parser import parse
parse('2011-01-03') #直接转换
parse('Jan 31, 1997 10:45 PM')
parse('6/12/2011', dayfirst=True) #日出现在月前面
pd.to_datetime(datestrs) #pandas方法解析日期
idx = pd.to_datetime(datestrs + [None])
In [4]:
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates) #dates放入DatetimeIndex中,ts是TimeSeries
ts + ts[::2] #索引的时间序列之间的算术运算按日期对齐
Out[4]:
2011-01-02 -0.756678 2011-01-05 NaN 2011-01-07 -3.294655 2011-01-08 NaN 2011-01-10 2.615796 2011-01-12 NaN dtype: float64
In [5]:
ts['1/10/2011'] #传入可以被解释为日期的字符串
ts['20110110']
longer_ts = pd.Series(np.random.randn(1000),
index=pd.date_range('1/1/2000', periods=1000))
longer_ts['2001'] #年份选取切片
longer_ts['2001-05'] #年月选取切片
ts[datetime(2011, 1, 7):] #日期切片只对规则Series有效
ts.truncate(after='1/9/2011') #也可截取两个日期之间的TimeSeries
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
index=dates,
columns=['Colorado', 'Texas', 'New York', 'Ohio'])
long_df.ix['5-2001']
Out[5]:
Colorado | Texas | New York | Ohio | |
---|---|---|---|---|
2001-05-02 | 0.018954 | -0.306463 | 0.446129 | 0.138928 |
2001-05-09 | -0.583198 | 1.617237 | -0.286660 | -0.144190 |
2001-05-16 | 1.655409 | -1.391932 | 1.537989 | -0.528939 |
2001-05-23 | 0.304632 | -0.677888 | -0.226242 | -0.756993 |
2001-05-30 | 0.422280 | 0.327755 | -0.797151 | -1.129093 |
In [6]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',
'1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts.index.is_unique #检查是否唯一
dup_ts['1/3/2000'] #不重复
dup_ts['1/2/2000'] #重复
grouped = dup_ts.groupby(level=0) #对非唯一时间戳数据进行聚合 传入level=0
grouped.mean()
Out[6]:
2000-01-01 0 2000-01-02 2 2000-01-03 4 dtype: int32
In [7]:
ts.resample('D') #转换为固定频率的时间序列
index = pd.date_range('4/1/2012', '6/1/2012') #默认按天计算时间点
pd.date_range(start='4/1/2012', periods=20)
pd.date_range(end='6/1/2012', periods=20)
pd.date_range('1/1/2000', '12/1/2000', freq='BM') #生成每月最后一个工作日
pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True) #规范化到午夜的时间戳
Out[7]:
DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06'], dtype='datetime64[ns]', freq='D')
频率和日期偏移量
In
[8]:
from pandas.tseries.offsets import Hour, Minute
hour = Hour()
four_hours = Hour(4) #4小时
pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')
Hour(2) + Minute(30)
pd.date_range('1/1/2000', periods=10, freq='1h30min')
rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI') #WOM可以获得诸如“每月第几个星期五”的日期
list(rng)
ts = pd.Series(np.random.randn(4),
index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts.shift(2) #时间前移
ts.shift(-2) #时间后移
ts / ts.shift(1) - 1 #计算时间序列中百分比变化
ts.shift(2, freq='M')
Out[8]:
2000-03-31 1.262079 2000-04-30 -0.238288 2000-05-31 0.614094 2000-06-30 0.379734 Freq: M, dtype: float64
In [9]:
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + MonthEnd() #根据锚点偏移
offset = MonthEnd()
offset.rollforward(now) #向前滚动
offset.rollback(now) #向后滚动
ts = pd.Series(np.random.randn(20),
index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts.groupby(offset.rollforward).mean() #结合groupby使用
Out[9]:
2000-01-31 0.208167 2000-02-29 -0.015882 2000-03-31 -0.137302 dtype: float64
In [26]:
import pytz #时区信息库
pytz.common_timezones[-5:] #输出时区名
tz = pytz.timezone('US/Eastern') #获取时区对象
tz
#本地化和转换
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
print(ts.index.tz) #pandas中的时间序列是naive时区
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC') #可以加上时区集
ts_utc = ts.tz_localize('UTC') #单纯到本地化转换
ts_utc.index
ts_utc.tz_convert('US/Eastern') #转换时区
ts_eastern = ts.tz_localize('US/Eastern') #可以将其本地化到EST,再转换到其它时间
ts_eastern.tz_convert('UTC')
ts_eastern.tz_convert('Europe/Berlin')
ts.index.tz_localize('Asia/Shanghai') #tz_localize和tz_convert也是DatetimeIndex的实例方法
#操作时区意识型Timestamp对象
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('US/Eastern')
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow') #创建Timestamp,传入一个时区信息
stamp_moscow
stamp_utc.value #返回UNIX纪元(1970年1月1日)算起的纳秒数
stamp_utc.tz_convert('US/Eastern').value
from pandas.tseries.offsets import Hour #夏令转变前30分钟
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp + Hour()
#夏令时转变前90分钟
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp + 2 * Hour()
None
Out[26]:
Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')
In [30]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B') #freq='B'为工作日
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2
result.index
Out[30]:
DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00', '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00', '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00', '2012-03-15 09:30:00+00:00'], dtype='datetime64[ns, UTC]', freq='B')
In [51]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.resample('M', how='mean') #重采样,从day->month 中间值取平均
ts.resample('M', how='mean', kind='period') #聚合到时期
#降采样
rng = pd.date_range('1/1/2000', periods=12, freq='T') #创建一分钟时间频率
ts = pd.Series(np.arange(12), index=rng)
ts.resample('5min', how='sum') #求和的方式聚合到5min
ts.resample('5min', how='sum', closed='left', label='left') #以左边界闭合 用左边界进行标记
ts.resample('5min', how='sum', loffset='-1s') #从右边界减去一秒
ts.resample('5min', how='ohlc') #金融领域常用聚合方式 o->开盘 c->收盘 h->最大值 l->最小值
#通过groupby进行重采样
rng = pd.date_range('1/1/2000', periods=100, freq='D')
ts = pd.Series(np.arange(100), index=rng)
ts.groupby(lambda x: x.month).mean() #传入能够访问时间序列的索引
#升采样和插值
frame = pd.DataFrame(np.random.randn(2, 4),
index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
columns=['Colorado', 'Texas', 'New York', 'Ohio'])
df_daily = frame.resample('D') #重采样到日频率,默认会引入缺失值
frame.resample('D', fill_method='ffill') #对缺失值进行填充
frame.resample('D', fill_method='ffill', limit=2) #只填充指定的时期数
frame.resample('W-THU', fill_method='ffill')
#通过时期进行重采样
frame = pd.DataFrame(np.random.randn(24, 4),
index=pd.period_range('1-2000', '12-2001', freq='M'),
columns=['Colorado', 'Texas', 'New York', 'Ohio'])
annual_frame = frame.resample('A-DEC', how='mean')
annual_frame.resample('Q-DEC', fill_method='ffill') #Q-DEC为季度型(每年以12月结束)
annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')
annual_frame.resample('Q-MAR', fill_method='ffill')
Out[51]:
Colorado | Texas | New York | Ohio | |
---|---|---|---|---|
2000Q4 | -0.393307 | -0.068283 | 0.357421 | 0.499684 |
2001Q1 | -0.393307 | -0.068283 | 0.357421 | 0.499684 |
2001Q2 | -0.393307 | -0.068283 | 0.357421 | 0.499684 |
2001Q3 | -0.393307 | -0.068283 | 0.357421 | 0.499684 |
2001Q4 | -0.180631 | -0.320163 | 0.395079 | 0.273438 |
2002Q1 | -0.180631 | -0.320163 | 0.395079 | 0.273438 |
2002Q2 | -0.180631 | -0.320163 | 0.395079 | 0.273438 |
2002Q3 | -0.180631 | -0.320163 | 0.395079 | 0.273438 |