python 时间序列处理

一、datetime库

from datetime import datetime
now = datetime.now()
print(now)
print(type(now))

2017-06-07 23:13:03.505630
<class 'datetime.datetime'>
diff = datetime(2017, 3, 4, 17) - datetime(2017, 2, 18, 15)
print(type(diff))
print(diff)
print('经历了{}天, {}秒。'.format(diff.days, diff.seconds))


<class 'datetime.timedelta'>
14 days, 2:00:00
经历了14天, 7200秒。

 

str >> datetime

# strptime
dt_str = '2017-02-18'
dt_obj2 = datetime.strptime(dt_str, '%Y-%m-%d')
print(type(dt_obj2))
print(dt_obj2)

<class 'datetime.datetime'>
2017-02-18 00:00:00

 

时间戳解析的用法:

# dateutil.parser.parse
from dateutil.parser import parse
dt_str2 = '18-02-2017'
dt_obj3 = parse(dt_str2)
print(type(dt_obj3))
print(dt_obj3)


<class 'datetime.datetime'>
2017-02-18 00:00:00

pandas的时间戳:

# pd.to_datetime
import pandas as pd
s_obj = pd.Series(['2017/02/18', '2017/02/19', '2017-02-25', '2017-02-26'], name='course_time')
print(s_obj)

0 2017/02/18
1 2017/02/19
2 2017-02-25
3 2017-02-26
Name: course_time, dtype: object

 

s_obj2 = pd.to_datetime(s_obj)
print(s_obj2)

0 2017-02-18
1 2017-02-19
2 2017-02-25
3 2017-02-26
Name: course_time, dtype: datetime64[ns]
# 处理缺失值
s_obj3 = pd.Series(['2017/02/18', '2017/02/19', '2017-02-25', '2017-02-26'] + [None], 
                   name='course_time')
print(s_obj3)


0 2017/02/18
1 2017/02/19
2 2017-02-25
3 2017-02-26
4 None
Name: course_time, dtype: object

 

 

s_obj4 = pd.to_datetime(s_obj3)
print(s_obj4) # NAT-> Not a Time

0 2017-02-18
1 2017-02-19
2 2017-02-25
3 2017-02-26
4 NaT
Name: course_time, dtype: datetime64[ns]

 

 

 

二、Pandas时间序列

创建

from datetime import datetime
import pandas as pd
import numpy as np

# 指定index为datetime的list
date_list = [datetime(2017, 2, 18), datetime(2017, 2, 19), 
             datetime(2017, 2, 25), datetime(2017, 2, 26), 
             datetime(2017, 3, 4), datetime(2017, 3, 5)]
time_s = pd.Series(np.random.randn(6), index=date_list)
print(time_s)
print(type(time_s.index))


2017-02-18   -0.230989
2017-02-19   -0.398082
2017-02-25   -0.309926
2017-02-26   -0.179672
2017-03-04    0.942698
2017-03-05    1.053092
dtype: float64
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

 

 

# pd.date_range()
dates = pd.date_range('2017-02-18', # 起始日期
                      periods=5,    # 周期
                      freq='W-SAT') # 频率
print(dates)
print(pd.Series(np.random.randn(5), index=dates))
#从2017-02-18开始每周六的数据,连续五周

DatetimeIndex(['2017-02-18', '2017-02-25', '2017-03-04', '2017-03-11',
'2017-03-18'],
dtype='datetime64[ns]', freq='W-SAT')
2017-02-18   -1.680280
2017-02-25    0.908664
2017-03-04    0.145318
2017-03-11   -2.940363
2017-03-18    0.152681
Freq: W-SAT, dtype: float64

生成日期范围

# 传入开始、结束日期,默认生成的该时间段的时间点是按天计算的
date_index = pd.date_range('2017/02/18', '2017/03/18')
print(date_index)


DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
'2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
'2017-02-26', '2017-02-27', '2017-02-28', '2017-03-01',
'2017-03-02', '2017-03-03', '2017-03-04', '2017-03-05',
'2017-03-06', '2017-03-07', '2017-03-08', '2017-03-09',
'2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
'2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
'2017-03-18'],
dtype='datetime64[ns]', freq='D')

 

# 只传入开始或结束日期,还需要传入时间段
print(pd.date_range(start='2017/02/18', periods=10, freq='4D'))

DatetimeIndex(['2017-02-18', '2017-02-22', '2017-02-26', '2017-03-02',
'2017-03-06', '2017-03-10', '2017-03-14', '2017-03-18',
'2017-03-22', '2017-03-26'],
dtype='datetime64[ns]', freq='4D')

print(pd.date_range(end='2017/03/18', periods=10))

DatetimeIndex(['2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',
'2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',
'2017-03-17', '2017-03-18'],
dtype='datetime64[ns]', freq='D')

# 规范化时间戳 
print(pd.date_range(start='2017/02/18 12:13:14', periods=10))
print(pd.date_range(start='2017/02/18 12:13:14', periods=10, normalize=True))

DatetimeIndex(['2017-02-18 12:13:14', '2017-02-19 12:13:14',
'2017-02-20 12:13:14', '2017-02-21 12:13:14',
'2017-02-22 12:13:14', '2017-02-23 12:13:14',
'2017-02-24 12:13:14', '2017-02-25 12:13:14',
'2017-02-26 12:13:14', '2017-02-27 12:13:14'],
dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21',
'2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25',
'2017-02-26', '2017-02-27'],
dtype='datetime64[ns]', freq='D')

 

频率与偏移量

print(pd.date_range('2017/02/18', '2017/03/18', freq='2D'))

DatetimeIndex(['2017-02-18', '2017-02-20', '2017-02-22', '2017-02-24',
'2017-02-26', '2017-02-28', '2017-03-02', '2017-03-04',
'2017-03-06', '2017-03-08', '2017-03-10', '2017-03-12',
'2017-03-14', '2017-03-16', '2017-03-18'],
dtype='datetime64[ns]', freq='2D')


# 偏移量通过加法连接
sum_offset = pd.tseries.offsets.Week(2) + pd.tseries.offsets.Hour(12)
print(sum_offset)

print(pd.date_range('2017/02/18', '2017/03/18', freq=sum_offset))

4 days 12:00:00
DatetimeIndex(['2017-02-18 00:00:00', '2017-03-04 12:00:00'], dtype='datetime64[ns]', freq='348H')

 

移动数据

ts = pd.Series(np.random.randn(5), index=pd.date_range('20170218', periods=5, freq='W-SAT')) print(ts)

2017-02-18   -0.208622
2017-02-25    0.616093
2017-03-04   -0.424725
2017-03-11   -0.361475
2017-03-18    0.761274
Freq: W-SAT, dtype: float64




print(ts.shift(1))
#print(ts.shift(-1))

2017-02-18         NaN
2017-02-25   -0.208622
2017-03-04    0.616093
2017-03-11   -0.424725
2017-03-18   -0.361475
Freq: W-SAT, dtype: float64

 

 

三、重采样

import pandas as pd
import numpy as np

date_rng = pd.date_range('20170101', periods=100, freq='D')
ser_obj = pd.Series(range(len(date_rng)), index=date_rng)
print(ser_obj.head(10))

2017-01-01    0
2017-01-02    1
2017-01-03    2
2017-01-04    3
2017-01-05    4
2017-01-06    5
2017-01-07    6
2017-01-08    7
2017-01-09    8
2017-01-10    9
Freq: D, dtype: int32

 

 

# 统计每个月的数据总和
resample_month_sum = ser_obj.resample('M').sum()
# 统计每个月的数据平均
resample_month_mean = ser_obj.resample('M').mean()

print('按月求和:', resample_month_sum)
print('按月求均值:', resample_month_mean)


按月求和: 2017-01-31     465
2017-02-28    1246
2017-03-31    2294
2017-04-30     945
Freq: M, dtype: int32
按月求均值: 2017-01-31    15.0
2017-02-28    44.5
2017-03-31    74.0
2017-04-30    94.5
Freq: M, dtype: float64

 

降采样

five_day_sum_sample = ser_obj.resample('5D').sum()
five_day_mean_sample = ser_obj.resample('5D').mean()
five_day_ohlc_sample = ser_obj.resample('5D').ohlc()

print('降采样,sum')
print(five_day_sum_sample)


降采样,sum
2017-01-01     10
2017-01-06     35
2017-01-11     60
2017-01-16     85
2017-01-21    110
2017-01-26    135
2017-01-31    160
2017-02-05    185
2017-02-10    210
2017-02-15    235
2017-02-20    260
2017-02-25    285
2017-03-02    310
2017-03-07    335
2017-03-12    360
2017-03-17    385
2017-03-22    410
2017-03-27    435
2017-04-01    460
2017-04-06    485
Freq: 5D, dtype: int32

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值