datetime基础

'''
【课程2.8】  时间模块:datetime

datetime模块,主要掌握:datetime.date(), datetime.datetime(), datetime.timedelta()

日期解析方法:parser.parse

'''
# datatime.date :date 对象
import datetime 
today=datetime.date.today()
print(today)
# datetime.date.today() 返回今天日期 格式为date
2019-05-25
# 直接转换日期 年,月,日
# datetime.date
print(datetime.date(2016,6,2))
2016-06-02
# datetime.datetime :datetime对象
# 返回现在的时间 datetime.datetime.now()
now=datetime.datetime.now()
print(now)
# 可以通过str转成字符串
2019-05-25 08:38:20.732982
# 转换日期,最少输入年月日
print(datetime.datetime(2016,5,6))
print(datetime.datetime(2015,5,6,5,22,22))
2016-05-06 00:00:00
2015-05-06 05:22:22
# 时间可以算时间差
t1 = datetime.datetime(2016,5,6)
t2 = datetime.datetime(2015,5,6,5,22,22)
print(t1-t2)
365 days, 18:37:38
# 时间差 datetime.timedelta:时间查
today=datetime.datetime.now()
yes=today-datetime.timedelta(1)
print(yes)
print(today-datetime.timedelta(10))
2019-05-24 08:38:21.237705
2019-05-15 08:38:21.237705
# 日期字符串转换
# parser.parse
from dateutil.parser import parse
date='21-12-2017'
print(parse(date))
print(parse('12-11-2017'))
print(parse('12/11/2017'))
print(parse('2017-5-6'))
print(parse('Jan 31, 1997 10:45 PM'))
# 参数dayfirst=True  国际通运格式日在月之前
print('===================')
print(parse('5/1/2019'))
print(parse('5/1/2019',dayfirst=True))
2017-12-21 00:00:00
2017-12-11 00:00:00
2017-12-11 00:00:00
2017-05-06 00:00:00
1997-01-31 22:45:00
===================
2019-05-01 00:00:00
2019-01-05 00:00:00
'''
【课程2.9】  Pandas时刻数据:Timestamp

时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据

pandas.Timestamp()

'''
'\n【课程2.9】  Pandas时刻数据:Timestamp\n\n时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据\n\npandas.Timestamp()\n\n'
import pandas as pd
import numpy as np
# pd.Timestamp()
print(pd.Timestamp('2017-12-21'))
print(pd.Timestamp('2018-5-8 1:1:1'))
print(pd.Timestamp(datetime.datetime(2015,5,6)))
2017-12-21 00:00:00
2018-05-08 01:01:01
2015-05-06 00:00:00
# pd.to_datetime()
from datetime import datetime
print(pd.to_datetime(datetime(2016,12,11,1,1,1)))
print(pd.to_datetime('2017-2-3'))
# 可以是时间串
print(pd.to_datetime(['2017-2-3','2018-3-4','2019-4-5']))
2016-12-11 01:01:01
2017-02-03 00:00:00
DatetimeIndex(['2017-02-03', '2018-03-04', '2019-04-05'], dtype='datetime64[ns]', freq=None)
# pd.to_datetime转成时间戳索引
print(pd.to_datetime(['2018-2-3','2019-2-6','2017-5-9']))
a = datetime(2016,2,2)
print(type(a))
print(pd.to_datetime([datetime(2016,5,6),datetime(2017,5,6)]))
DatetimeIndex(['2018-02-03', '2019-02-06', '2017-05-09'], dtype='datetime64[ns]', freq=None)
<class 'datetime.datetime'>
DatetimeIndex(['2016-05-06', '2017-05-06'], dtype='datetime64[ns]', freq=None)
# 如果夹杂其他格式 可以采用errors参数
# ignore 产生一般数组,coerce 返回NaT
date=['2018-2-3','2019-2-6','hello','2017-5-9']
print(pd.to_datetime(date,errors='ignore'))
print(pd.to_datetime(date,errors='coerce'))
['2018-2-3' '2019-2-6' 'hello' '2017-5-9']
DatetimeIndex(['2018-02-03', '2019-02-06', 'NaT', '2017-05-09'], dtype='datetime64[ns]', freq=None)
'''
【课程2.10】  Pandas时间戳索引:DatetimeIndex

核心:pd.date_range()

'''
'\n【课程2.10】  Pandas时间戳索引:DatetimeIndex\n\n核心:pd.date_range()\n\n'
# pd.DatatimeIndex() 与 TimeSeries时间序列
# pd.DatetimeIndex()
print(pd.DatetimeIndex(['2019/5/4','5/4/2018','3/5/2016']))
DatetimeIndex(['2019-05-04', '2018-05-04', '2016-03-05'], dtype='datetime64[ns]', freq=None)
rng = pd.DatetimeIndex(['2019/5/4','5/4/2018','3/5/2016'])
print(pd.Series(np.random.rand(len(rng)),index=rng))
# 以DatetimeIndex为index的Series
2019-05-04    0.747358
2018-05-04    0.484367
2016-03-05    0.666840
dtype: float64
# pd.date_range()-日期范围:生成日期范围
# 2种生成方式:①start + end; ②start/end + periods
# 默认频率:day
print(pd.date_range('1/1/2017','30/1/2017'))
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
               '2017-01-13', '2017-01-14', '2017-01-15', '2017-01-16',
               '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20',
               '2017-01-21', '2017-01-22', '2017-01-23', '2017-01-24',
               '2017-01-25', '2017-01-26', '2017-01-27', '2017-01-28',
               '2017-01-29', '2017-01-30'],
              dtype='datetime64[ns]', freq='D')
# periods周期
print(pd.date_range('1/1/2017',periods=10)) 
print(pd.date_range(end='2017/1/1',periods=10))
# pd.date_range(start,end,period,freq,tz,normalize,name,closed,)
'''
start:开始时间
end:结束时间
periods:偏移量
freq:频率 默认天 pd.date_range()默认日历日  pd.bdate_range()默认频率为工作日
tz:时区
'''
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2016-12-23', '2016-12-24', '2016-12-25', '2016-12-26',
               '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30',
               '2016-12-31', '2017-01-01'],
              dtype='datetime64[ns]', freq='D')





'\nstart:开始时间\nend:结束时间\nperiods:偏移量\nfreq:频率 默认天 pd.date_range()默认日历日  pd.bdate_range()默认频率为工作日\ntz:时区\n'
print(pd.date_range('2017/2/2 15:00:00',periods=10))
print(pd.date_range('2017/2/2 15:00:00',periods=10,normalize=True))
DatetimeIndex(['2017-02-02 15:00:00', '2017-02-03 15:00:00',
               '2017-02-04 15:00:00', '2017-02-05 15:00:00',
               '2017-02-06 15:00:00', '2017-02-07 15:00:00',
               '2017-02-08 15:00:00', '2017-02-09 15:00:00',
               '2017-02-10 15:00:00', '2017-02-11 15:00:00'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-02-02', '2017-02-03', '2017-02-04', '2017-02-05',
               '2017-02-06', '2017-02-07', '2017-02-08', '2017-02-09',
               '2017-02-10', '2017-02-11'],
              dtype='datetime64[ns]', freq='D')
# closed 默认左闭右闭,left左闭右开,right左开右闭
print(pd.date_range('2017/1/1','2017/1/4'))
print(pd.date_range('2017/1/1','2017/1/4',closed='left'))
print(pd.date_range('2017/1/1','2017/1/4',closed='right'))
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
# 可以直接转换为list
print(list(pd.date_range('2017/1/1','2017/1/4')))
[Timestamp('2017-01-01 00:00:00', offset='D'), Timestamp('2017-01-02 00:00:00', offset='D'), Timestamp('2017-01-03 00:00:00', offset='D'), Timestamp('2017-01-04 00:00:00', offset='D')]
# pd.date_range()-日期范围:频率(1)
print(pd.date_range('2017/1/1','2017/1/4')) # 默认为天
print(pd.date_range('2017/1/1','2017/1/30',freq='B'))   # B,每工作日
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
               '2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
               '2017-01-12', '2017-01-13', '2017-01-16', '2017-01-17',
               '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-23',
               '2017-01-24', '2017-01-25', '2017-01-26', '2017-01-27',
               '2017-01-30'],
              dtype='datetime64[ns]', freq='B')
print(pd.date_range('2017/1/1','2017/1/4',freq='H'))   # 每小时
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
               '2017-01-01 02:00:00', '2017-01-01 03:00:00',
               '2017-01-01 04:00:00', '2017-01-01 05:00:00',
               '2017-01-01 06:00:00', '2017-01-01 07:00:00',
               '2017-01-01 08:00:00', '2017-01-01 09:00:00',
               '2017-01-01 10:00:00', '2017-01-01 11:00:00',
               '2017-01-01 12:00:00', '2017-01-01 13:00:00',
               '2017-01-01 14:00:00', '2017-01-01 15:00:00',
               '2017-01-01 16:00:00', '2017-01-01 17:00:00',
               '2017-01-01 18:00:00', '2017-01-01 19:00:00',
               '2017-01-01 20:00:00', '2017-01-01 21:00:00',
               '2017-01-01 22:00:00', '2017-01-01 23:00:00',
               '2017-01-02 00:00:00', '2017-01-02 01:00:00',
               '2017-01-02 02:00:00', '2017-01-02 03:00:00',
               '2017-01-02 04:00:00', '2017-01-02 05:00:00',
               '2017-01-02 06:00:00', '2017-01-02 07:00:00',
               '2017-01-02 08:00:00', '2017-01-02 09:00:00',
               '2017-01-02 10:00:00', '2017-01-02 11:00:00',
               '2017-01-02 12:00:00', '2017-01-02 13:00:00',
               '2017-01-02 14:00:00', '2017-01-02 15:00:00',
               '2017-01-02 16:00:00', '2017-01-02 17:00:00',
               '2017-01-02 18:00:00', '2017-01-02 19:00:00',
               '2017-01-02 20:00:00', '2017-01-02 21:00:00',
               '2017-01-02 22:00:00', '2017-01-02 23:00:00',
               '2017-01-03 00:00:00', '2017-01-03 01:00:00',
               '2017-01-03 02:00:00', '2017-01-03 03:00:00',
               '2017-01-03 04:00:00', '2017-01-03 05:00:00',
               '2017-01-03 06:00:00', '2017-01-03 07:00:00',
               '2017-01-03 08:00:00', '2017-01-03 09:00:00',
               '2017-01-03 10:00:00', '2017-01-03 11:00:00',
               '2017-01-03 12:00:00', '2017-01-03 13:00:00',
               '2017-01-03 14:00:00', '2017-01-03 15:00:00',
               '2017-01-03 16:00:00', '2017-01-03 17:00:00',
               '2017-01-03 18:00:00', '2017-01-03 19:00:00',
               '2017-01-03 20:00:00', '2017-01-03 21:00:00',
               '2017-01-03 22:00:00', '2017-01-03 23:00:00',
               '2017-01-04 00:00:00'],
              dtype='datetime64[ns]', freq='H')
print(pd.date_range('2017/1/1','2017/1/4',freq='T'))   # 每分钟
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00',
               '2017-01-01 00:02:00', '2017-01-01 00:03:00',
               '2017-01-01 00:04:00', '2017-01-01 00:05:00',
               '2017-01-01 00:06:00', '2017-01-01 00:07:00',
               '2017-01-01 00:08:00', '2017-01-01 00:09:00',
               ...
               '2017-01-03 23:51:00', '2017-01-03 23:52:00',
               '2017-01-03 23:53:00', '2017-01-03 23:54:00',
               '2017-01-03 23:55:00', '2017-01-03 23:56:00',
               '2017-01-03 23:57:00', '2017-01-03 23:58:00',
               '2017-01-03 23:59:00', '2017-01-04 00:00:00'],
              dtype='datetime64[ns]', length=4321, freq='T')
print(pd.date_range('2017/1/1','2017/1/4',freq='H'))   # 每小时
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
               '2017-01-01 02:00:00', '2017-01-01 03:00:00',
               '2017-01-01 04:00:00', '2017-01-01 05:00:00',
               '2017-01-01 06:00:00', '2017-01-01 07:00:00',
               '2017-01-01 08:00:00', '2017-01-01 09:00:00',
               '2017-01-01 10:00:00', '2017-01-01 11:00:00',
               '2017-01-01 12:00:00', '2017-01-01 13:00:00',
               '2017-01-01 14:00:00', '2017-01-01 15:00:00',
               '2017-01-01 16:00:00', '2017-01-01 17:00:00',
               '2017-01-01 18:00:00', '2017-01-01 19:00:00',
               '2017-01-01 20:00:00', '2017-01-01 21:00:00',
               '2017-01-01 22:00:00', '2017-01-01 23:00:00',
               '2017-01-02 00:00:00', '2017-01-02 01:00:00',
               '2017-01-02 02:00:00', '2017-01-02 03:00:00',
               '2017-01-02 04:00:00', '2017-01-02 05:00:00',
               '2017-01-02 06:00:00', '2017-01-02 07:00:00',
               '2017-01-02 08:00:00', '2017-01-02 09:00:00',
               '2017-01-02 10:00:00', '2017-01-02 11:00:00',
               '2017-01-02 12:00:00', '2017-01-02 13:00:00',
               '2017-01-02 14:00:00', '2017-01-02 15:00:00',
               '2017-01-02 16:00:00', '2017-01-02 17:00:00',
               '2017-01-02 18:00:00', '2017-01-02 19:00:00',
               '2017-01-02 20:00:00', '2017-01-02 21:00:00',
               '2017-01-02 22:00:00', '2017-01-02 23:00:00',
               '2017-01-03 00:00:00', '2017-01-03 01:00:00',
               '2017-01-03 02:00:00', '2017-01-03 03:00:00',
               '2017-01-03 04:00:00', '2017-01-03 05:00:00',
               '2017-01-03 06:00:00', '2017-01-03 07:00:00',
               '2017-01-03 08:00:00', '2017-01-03 09:00:00',
               '2017-01-03 10:00:00', '2017-01-03 11:00:00',
               '2017-01-03 12:00:00', '2017-01-03 13:00:00',
               '2017-01-03 14:00:00', '2017-01-03 15:00:00',
               '2017-01-03 16:00:00', '2017-01-03 17:00:00',
               '2017-01-03 18:00:00', '2017-01-03 19:00:00',
               '2017-01-03 20:00:00', '2017-01-03 21:00:00',
               '2017-01-03 22:00:00', '2017-01-03 23:00:00',
               '2017-01-04 00:00:00'],
              dtype='datetime64[ns]', freq='H')
print(pd.date_range('2017/1/1','2017/1/4',freq='T'))   #  每分钟
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00',
               '2017-01-01 00:02:00', '2017-01-01 00:03:00',
               '2017-01-01 00:04:00', '2017-01-01 00:05:00',
               '2017-01-01 00:06:00', '2017-01-01 00:07:00',
               '2017-01-01 00:08:00', '2017-01-01 00:09:00',
               ...
               '2017-01-03 23:51:00', '2017-01-03 23:52:00',
               '2017-01-03 23:53:00', '2017-01-03 23:54:00',
               '2017-01-03 23:55:00', '2017-01-03 23:56:00',
               '2017-01-03 23:57:00', '2017-01-03 23:58:00',
               '2017-01-03 23:59:00', '2017-01-04 00:00:00'],
              dtype='datetime64[ns]', length=4321, freq='T')
print(pd.date_range('2017/1/1','2017/1/4',freq='S'))   # 每秒
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:00:01',
               '2017-01-01 00:00:02', '2017-01-01 00:00:03',
               '2017-01-01 00:00:04', '2017-01-01 00:00:05',
               '2017-01-01 00:00:06', '2017-01-01 00:00:07',
               '2017-01-01 00:00:08', '2017-01-01 00:00:09',
               ...
               '2017-01-03 23:59:51', '2017-01-03 23:59:52',
               '2017-01-03 23:59:53', '2017-01-03 23:59:54',
               '2017-01-03 23:59:55', '2017-01-03 23:59:56',
               '2017-01-03 23:59:57', '2017-01-03 23:59:58',
               '2017-01-03 23:59:59', '2017-01-04 00:00:00'],
              dtype='datetime64[ns]', length=259201, freq='S')
# L 每毫秒,U每微秒
# 指定每周的星期几
print(pd.date_range('2017/1/1','2017/2/1',freq='W-MON'))
DatetimeIndex(['2017-01-02', '2017-01-09', '2017-01-16', '2017-01-23',
               '2017-01-30'],
              dtype='datetime64[ns]', freq='W-MON')
# 每月第2个周一
print(pd.date_range('2017/1/1','2017/5/1',freq='WOM-2MON'))
DatetimeIndex(['2017-01-09', '2017-02-13', '2017-03-13', '2017-04-10'], dtype='datetime64[ns]', freq='WOM-2MON')
# pd.date_range()-日期范围:频率(2)
# 每月最后一个日历日  M
print(pd.date_range('2017/1/1','2018/1/1',freq='M'))
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],
              dtype='datetime64[ns]', freq='M')
# 指定月为季度末,每个季度末的最后一个日历日  Q-月
print(pd.date_range('2017','2018',freq='Q-DEC'))
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-30', '2017-12-31'], dtype='datetime64[ns]', freq='Q-DEC')
# 指定月份的最后一个日历日   A-月
print(pd.date_range('2017','2018',freq='A-DEC'))
DatetimeIndex(['2017-12-31'], dtype='datetime64[ns]', freq='A-DEC')
# 每月最后一个工作日
print(pd.date_range('2017','2018',freq='BM'))
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-28',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29'],
              dtype='datetime64[ns]', freq='BM')
# 指定月为季度末,每个季度末最后一个工作日  BQ-月
print(pd.date_range('2017','2018',freq='BQ-DEC'))
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-29', '2017-12-29'], dtype='datetime64[ns]', freq='BQ-DEC')
# 指定月的最后一个工作日  BA-月
print(pd.date_range('2017','2018',freq='BA-DEC'))
DatetimeIndex(['2017-12-29'], dtype='datetime64[ns]', freq='BA-DEC')
# 每个月的第一个日历日  MS
print(pd.date_range('2017','2018',freq='MS'))
DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',
               '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01',
               '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01',
               '2018-01-01'],
              dtype='datetime64[ns]', freq='MS')
# 指定月为季度末,每个季度最后一个月的第一个日历日
print(pd.date_range('2017','2018',freq='QS-DEC'))
DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='QS-DEC')
# 指定月的第一个工作日
print(pd.date_range('2017','2018',freq='AS-DEC'))
DatetimeIndex(['2017-12-01'], dtype='datetime64[ns]', freq='AS-DEC')
# 每个月的第一个工作日
print(pd.date_range('2017','2018',freq='BMS'))
DatetimeIndex(['2017-01-02', '2017-02-01', '2017-03-01', '2017-04-03',
               '2017-05-01', '2017-06-01', '2017-07-03', '2017-08-01',
               '2017-09-01', '2017-10-02', '2017-11-01', '2017-12-01',
               '2018-01-01'],
              dtype='datetime64[ns]', freq='BMS')
#指定月为季度末,每个季度最后一个月的第一天 
print(pd.date_range('2017','2018',freq='BQS-DEC'))
DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='BQS-DEC')
# 指定月的第一个工作日 
print(pd.date_range('2017','2020', freq = 'BAS-DEC')) 
DatetimeIndex(['2017-12-01', '2018-12-03', '2019-12-02'], dtype='datetime64[ns]', freq='BAS-DEC')
# pd.date_range()-日期范围:复合频率
# 7天一个频率
print(pd.date_range('2017/1/1','2017/4/1',freq='7D'))
DatetimeIndex(['2017-01-01', '2017-01-08', '2017-01-15', '2017-01-22',
               '2017-01-29', '2017-02-05', '2017-02-12', '2017-02-19',
               '2017-02-26', '2017-03-05', '2017-03-12', '2017-03-19',
               '2017-03-26'],
              dtype='datetime64[ns]', freq='7D')
print(pd.date_range('2017/1/1','2017/1/2', freq = '2h30min'))  # 2小时30分钟
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 02:30:00',
               '2017-01-01 05:00:00', '2017-01-01 07:30:00',
               '2017-01-01 10:00:00', '2017-01-01 12:30:00',
               '2017-01-01 15:00:00', '2017-01-01 17:30:00',
               '2017-01-01 20:00:00', '2017-01-01 22:30:00'],
              dtype='datetime64[ns]', freq='150T')
print(pd.date_range('2017','2018', freq = '2M'))  # 2月,每月最后一个日历日
DatetimeIndex(['2017-01-31', '2017-03-31', '2017-05-31', '2017-07-31',
               '2017-09-30', '2017-11-30'],
              dtype='datetime64[ns]', freq='2M')
# asfreq:时期频率转换
ts=pd.Series(np.random.rand(4),
            index=pd.date_range('2017/1/1','2017/1/4'))
print(ts)
print(ts.asfreq('4H'))
2017-01-01    0.121601
2017-01-02    0.813354
2017-01-03    0.258622
2017-01-04    0.816194
Freq: D, dtype: float64
2017-01-01 00:00:00    0.121601
2017-01-01 04:00:00         NaN
2017-01-01 08:00:00         NaN
2017-01-01 12:00:00         NaN
2017-01-01 16:00:00         NaN
2017-01-01 20:00:00         NaN
2017-01-02 00:00:00    0.813354
2017-01-02 04:00:00         NaN
2017-01-02 08:00:00         NaN
2017-01-02 12:00:00         NaN
2017-01-02 16:00:00         NaN
2017-01-02 20:00:00         NaN
2017-01-03 00:00:00    0.258622
2017-01-03 04:00:00         NaN
2017-01-03 08:00:00         NaN
2017-01-03 12:00:00         NaN
2017-01-03 16:00:00         NaN
2017-01-03 20:00:00         NaN
2017-01-04 00:00:00    0.816194
Freq: 4H, dtype: float64
# 填充 method
print(ts.asfreq('4H',method='ffill'))  
# ffill 用之前的填充
2017-01-01 00:00:00    0.121601
2017-01-01 04:00:00    0.121601
2017-01-01 08:00:00    0.121601
2017-01-01 12:00:00    0.121601
2017-01-01 16:00:00    0.121601
2017-01-01 20:00:00    0.121601
2017-01-02 00:00:00    0.813354
2017-01-02 04:00:00    0.813354
2017-01-02 08:00:00    0.813354
2017-01-02 12:00:00    0.813354
2017-01-02 16:00:00    0.813354
2017-01-02 20:00:00    0.813354
2017-01-03 00:00:00    0.258622
2017-01-03 04:00:00    0.258622
2017-01-03 08:00:00    0.258622
2017-01-03 12:00:00    0.258622
2017-01-03 16:00:00    0.258622
2017-01-03 20:00:00    0.258622
2017-01-04 00:00:00    0.816194
Freq: 4H, dtype: float64
print(ts.asfreq('4H',method='bfill'))  
# bfill 用之后的填充
2017-01-01 00:00:00    0.121601
2017-01-01 04:00:00    0.813354
2017-01-01 08:00:00    0.813354
2017-01-01 12:00:00    0.813354
2017-01-01 16:00:00    0.813354
2017-01-01 20:00:00    0.813354
2017-01-02 00:00:00    0.813354
2017-01-02 04:00:00    0.258622
2017-01-02 08:00:00    0.258622
2017-01-02 12:00:00    0.258622
2017-01-02 16:00:00    0.258622
2017-01-02 20:00:00    0.258622
2017-01-03 00:00:00    0.258622
2017-01-03 04:00:00    0.816194
2017-01-03 08:00:00    0.816194
2017-01-03 12:00:00    0.816194
2017-01-03 16:00:00    0.816194
2017-01-03 20:00:00    0.816194
2017-01-04 00:00:00    0.816194
Freq: 4H, dtype: float64
# pd.date_range()-日期范围:超前/滞后数据
ts=pd.Series(np.random.rand(4),
            index=pd.date_range('20170101','20170104'))
print(ts)
2017-01-01    0.764262
2017-01-02    0.894735
2017-01-03    0.302417
2017-01-04    0.765669
Freq: D, dtype: float64
# shift 数据滞后
print(ts.shift(2))
2017-01-01         NaN
2017-01-02         NaN
2017-01-03    0.764262
2017-01-04    0.894735
Freq: D, dtype: float64
# shift 数据超前
print(ts.shift(-2))
2017-01-01    0.302417
2017-01-02    0.765669
2017-01-03         NaN
2017-01-04         NaN
Freq: D, dtype: float64
# 这个一般用来计算变化百分比
per=ts/ts.shift(1)-1
print(per)
2017-01-01         NaN
2017-01-02    0.170716
2017-01-03   -0.662003
2017-01-04    1.531832
Freq: D, dtype: float64
# 对时间戳进行位移  添加参数freq
# freq=D 时间向后
print(ts.shift(2,freq='D'))
2017-01-03    0.764262
2017-01-04    0.894735
2017-01-05    0.302417
2017-01-06    0.765669
Freq: D, dtype: float64
#freq=T  时间向前
print(ts.shift(2,freq='T'))
2017-01-01 00:02:00    0.764262
2017-01-02 00:02:00    0.894735
2017-01-03 00:02:00    0.302417
2017-01-04 00:02:00    0.765669
Freq: D, dtype: float64
'''
【课程2.11】  Pandas时期:Period

核心:pd.Period()

'''
'\n【课程2.11】  Pandas时期:Period\n\n核心:pd.Period()\n\n'
# 生成一个以2017-1开始,以月为频率的构造器
p=pd.Period('2017',freq='M')
print(p)
2017-01
print(p+1)
print(p-1)
2017-02
2016-12
print(pd.Period('2012',freq='A-DEC')-1)
2011
# 创建时期范围 pd.period_range()
pr=pd.period_range('1/1/2017','1/1/2018',freq='M')
print(pr)
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
             '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
             '2018-01'],
            dtype='int64', freq='M')
# 时间序列  时期序列
s=pd.Series(np.random.rand(len(pr)),index=pr)
print(s)
2017-01    0.395536
2017-02    0.867729
2017-03    0.727422
2017-04    0.194098
2017-05    0.897134
2017-06    0.119327
2017-07    0.481213
2017-08    0.343418
2017-09    0.474208
2017-10    0.271213
2017-11    0.621978
2017-12    0.723095
2018-01    0.234442
Freq: M, dtype: float64
# 频率转换 asfreq
p=pd.Period('2017','A-DEC')
print(p)
2017
print(p.asfreq('M',how='start'))
print(p.asfreq('D',how='end'))
# 通过.asfreq(freq,method=None,how=None)的方法转换成别的频率
2017-01
2017-12-31
#asfreq也可以转换timeSeries的index
pr=pd.period_range('2017','2018',freq='M')
print(pr)
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
             '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
             '2018-01'],
            dtype='int64', freq='M')
s1=pd.Series(np.random.rand(len(pr)),index=pr)
print(s1)
print(pd.Series(np.random.rand(len(pr)),index=pr.asfreq('D',how='start')))
2017-01    0.027461
2017-02    0.351679
2017-03    0.026355
2017-04    0.130524
2017-05    0.323551
2017-06    0.418091
2017-07    0.252305
2017-08    0.159283
2017-09    0.863181
2017-10    0.113546
2017-11    0.834609
2017-12    0.404364
2018-01    0.248782
Freq: M, dtype: float64
2017-01-01    0.942848
2017-02-01    0.787942
2017-03-01    0.898672
2017-04-01    0.838851
2017-05-01    0.687743
2017-06-01    0.835100
2017-07-01    0.971016
2017-08-01    0.433730
2017-09-01    0.984731
2017-10-01    0.197288
2017-11-01    0.530533
2017-12-01    0.734869
2018-01-01    0.652892
Freq: D, dtype: float64
# 时间戳与时间之间的转换 : pd.to_period()   pd.to_timestamp()
dr=pd.date_range('2017/1/1',periods=10,freq='M')
pr=pd.period_range('2017','2018',freq='M')
print(dr)
print(pr)
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31'],
              dtype='datetime64[ns]', freq='M')
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
             '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
             '2018-01'],
            dtype='int64', freq='M')
# 每月最后一天转换为每月
ts1=pd.Series(np.random.rand(len(dr)),index=dr)
print(ts1)
print(ts1.to_period())
2017-01-31    0.158806
2017-02-28    0.483492
2017-03-31    0.372044
2017-04-30    0.034802
2017-05-31    0.283967
2017-06-30    0.344219
2017-07-31    0.322696
2017-08-31    0.896927
2017-09-30    0.209306
2017-10-31    0.904640
Freq: M, dtype: float64
2017-01    0.158806
2017-02    0.483492
2017-03    0.372044
2017-04    0.034802
2017-05    0.283967
2017-06    0.344219
2017-07    0.322696
2017-08    0.896927
2017-09    0.209306
2017-10    0.904640
Freq: M, dtype: float64
# 每月转换为每月第一天
ts2=pd.Series(np.random.rand(len(pr)),index=pr)
print(ts2)
print(ts2.to_timestamp())
2017-01    0.045885
2017-02    0.254705
2017-03    0.010104
2017-04    0.184321
2017-05    0.080500
2017-06    0.306121
2017-07    0.186046
2017-08    0.915024
2017-09    0.612428
2017-10    0.348309
2017-11    0.969925
2017-12    0.769598
2018-01    0.482902
Freq: M, dtype: float64
2017-01-01    0.045885
2017-02-01    0.254705
2017-03-01    0.010104
2017-04-01    0.184321
2017-05-01    0.080500
2017-06-01    0.306121
2017-07-01    0.186046
2017-08-01    0.915024
2017-09-01    0.612428
2017-10-01    0.348309
2017-11-01    0.969925
2017-12-01    0.769598
2018-01-01    0.482902
Freq: MS, dtype: float64
'''
【课程2.12】  时间序列 - 索引及切片

TimeSeries是Series的一个子类,所以Series索引及数据选取方面的方法基本一样

同时TimeSeries通过时间序列有更便捷的方法做索引和切片
 
'''
# 索引
from datetime import datetime
dr=pd.date_range('2017/1','2017/3')
s=pd.Series(np.random.rand(len(dr)),index=dr)
print(s.head())
2017-01-01    0.965767
2017-01-02    0.092477
2017-01-03    0.101372
2017-01-04    0.518738
2017-01-05    0.512767
Freq: D, dtype: float64
# 下标位置索引
print(s[0])
print(s[:5])
print(s[::2])
0.965766677911
2017-01-01    0.965767
2017-01-02    0.092477
2017-01-03    0.101372
2017-01-04    0.518738
2017-01-05    0.512767
Freq: D, dtype: float64
2017-01-01    0.965767
2017-01-03    0.101372
2017-01-05    0.512767
2017-01-07    0.397813
2017-01-09    0.044512
2017-01-11    0.440636
2017-01-13    0.576041
2017-01-15    0.661706
2017-01-17    0.413142
2017-01-19    0.061130
2017-01-21    0.921432
2017-01-23    0.268689
2017-01-25    0.305809
2017-01-27    0.859973
2017-01-29    0.420669
2017-01-31    0.488450
2017-02-02    0.427845
2017-02-04    0.277361
2017-02-06    0.649949
2017-02-08    0.347356
2017-02-10    0.147578
2017-02-12    0.133765
2017-02-14    0.751505
2017-02-16    0.516944
2017-02-18    0.703469
2017-02-20    0.085836
2017-02-22    0.374159
2017-02-24    0.711476
2017-02-26    0.748231
2017-02-28    0.595617
Freq: 2D, dtype: float64
# 时间序列标签索引
print(s['2017-01-15'])
print(s[datetime(2017,1,1)])
0.66170600133
0.965766677911
# 切片
dr = pd.date_range('2017/1','2017/3',freq='12H')
s=pd.Series(np.random.rand(len(dr)),index=dr)
print(s.head())
2017-01-01 00:00:00    0.318529
2017-01-01 12:00:00    0.648512
2017-01-02 00:00:00    0.325831
2017-01-02 12:00:00    0.717528
2017-01-03 00:00:00    0.802091
Freq: 12H, dtype: float64
print(s['2017-01-01':'2017-01-02'])
2017-01-01 00:00:00    0.318529
2017-01-01 12:00:00    0.648512
2017-01-02 00:00:00    0.325831
2017-01-02 12:00:00    0.717528
Freq: 12H, dtype: float64
# 重复索引的时间序列
dates=pd.DatetimeIndex(['1/1/2015','1/2/2015','1/3/2015','1/4/2015','1/1/2015','1/2/2015'])
ts=pd.Series(np.random.rand(6),index=dates)
print(ts)
2015-01-01    0.428691
2015-01-02    0.589544
2015-01-03    0.160050
2015-01-04    0.652073
2015-01-01    0.219171
2015-01-02    0.733021
dtype: float64
print(ts.is_unique)
print(ts.index.is_unique)
True
False
print(ts['2015-01-01'])
2015-01-01    0.428691
2015-01-01    0.219171
dtype: float64
# 通过group分组,重复的值用平均值处理
print(ts.groupby(level=0).mean())
2015-01-01    0.323931
2015-01-02    0.661282
2015-01-03    0.160050
2015-01-04    0.652073
dtype: float64
'''
【课程2.13】  时间序列 - 重采样

将时间序列从一个频率转换为另一个频率的过程,且会有数据的结合

降采样:高频数据 → 低频数据,eg.以天为频率的数据转为以月为频率的数据
升采样:低频数据 → 高频数据,eg.以年为频率的数据转为以月为频率的数据
 
'''
# 重采样: .resample()
# 创建一个以天为频率的TimeSeries 重采样按2天为频率
dr=pd.date_range('20170101',periods=12)
ts=pd.Series(np.arange(12),index=dr)
print(ts)
2017-01-01     0
2017-01-02     1
2017-01-03     2
2017-01-04     3
2017-01-05     4
2017-01-06     5
2017-01-07     6
2017-01-08     7
2017-01-09     8
2017-01-10     9
2017-01-11    10
2017-01-12    11
Freq: D, dtype: int32
ts_re=ts.resample('5D')
print(ts_re)
DatetimeIndexResampler [freq=<5 * Days>, axis=0, closed=left, label=left, convention=start, base=0]
print(ts.resample('5D').sum())
2017-01-01    10
2017-01-06    35
2017-01-11    21
Freq: 5D, dtype: int32
print(ts.resample('5D').mean()) # 平局值
2017-01-01     2.0
2017-01-06     7.0
2017-01-11    10.5
Freq: 5D, dtype: float64
print(ts.resample('5D').max())   # 最大值
2017-01-01     4
2017-01-06     9
2017-01-11    11
Freq: 5D, dtype: int32
print(ts.resample('5D').median())   # 求中值
2017-01-01     2.0
2017-01-06     7.0
2017-01-11    10.5
Freq: 5D, dtype: float64
print(ts.resample('5D').first())   # 返回第一个只
2017-01-01     0
2017-01-06     5
2017-01-11    10
Freq: 5D, dtype: int32
print(ts.resample('5D').ohlc(),'→ OHLC重采样\n')
            open  high  low  close
2017-01-01     0     4    0      4
2017-01-06     5     9    5      9
2017-01-11    10    11   10     11 → OHLC重采样
# 降采样
dr=pd.date_range('20170101',periods=12)
ts=pd.Series(np.arange(1,13),index=dr)
print(ts)
2017-01-01     1
2017-01-02     2
2017-01-03     3
2017-01-04     4
2017-01-05     5
2017-01-06     6
2017-01-07     7
2017-01-08     8
2017-01-09     9
2017-01-10    10
2017-01-11    11
2017-01-12    12
Freq: D, dtype: int32
print(ts.resample('5D').sum())
# 默认是左闭右闭  [1,2,3,4,5][6,7,8,9,10][11,12]
2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32
print(ts.resample('5D',closed='left').sum())
# 间隔左边为结束  [1,2,3,4,5][6,7,8,9,10][11,12]
2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32
print(ts.resample('5D',closed='right').sum())
# 间隔右边为结束  [1][2,3,4,5,6][7,8,9,10,11][12]
2016-12-27     1
2017-01-01    20
2017-01-06    45
2017-01-11    12
Freq: 5D, dtype: int32
print(ts.resample('5D', label = 'left').sum(),'→ leftlabel\n')
print(ts.resample('5D', label = 'right').sum(),'→ rightlabel\n')
# label:聚合值的index,默认为取左
# 值采样认为默认(这里closed默认)
2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32 → leftlabel

2017-01-06    15
2017-01-11    40
2017-01-16    23
Freq: 5D, dtype: int32 → rightlabel
# 升采样及插值
dr=pd.date_range('2017/1/1 0:0:0',periods=5,freq='H')
ts=pd.DataFrame(np.arange(15).reshape(5,3),index=dr,columns=list('abc'))
print(ts)
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 01:00:00   3   4   5
2017-01-01 02:00:00   6   7   8
2017-01-01 03:00:00   9  10  11
2017-01-01 04:00:00  12  13  14
print(ts.resample('15T').asfreq())
print(ts.resample('15T').ffill())
print(ts.resample('15T').bfill())
# .asfreq():不做填充,返回Nan
# .ffill():向上填充
# .bfill():向下填充
                        a     b     c
2017-01-01 00:00:00   0.0   1.0   2.0
2017-01-01 00:15:00   NaN   NaN   NaN
2017-01-01 00:30:00   NaN   NaN   NaN
2017-01-01 00:45:00   NaN   NaN   NaN
2017-01-01 01:00:00   3.0   4.0   5.0
2017-01-01 01:15:00   NaN   NaN   NaN
2017-01-01 01:30:00   NaN   NaN   NaN
2017-01-01 01:45:00   NaN   NaN   NaN
2017-01-01 02:00:00   6.0   7.0   8.0
2017-01-01 02:15:00   NaN   NaN   NaN
2017-01-01 02:30:00   NaN   NaN   NaN
2017-01-01 02:45:00   NaN   NaN   NaN
2017-01-01 03:00:00   9.0  10.0  11.0
2017-01-01 03:15:00   NaN   NaN   NaN
2017-01-01 03:30:00   NaN   NaN   NaN
2017-01-01 03:45:00   NaN   NaN   NaN
2017-01-01 04:00:00  12.0  13.0  14.0
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 00:15:00   0   1   2
2017-01-01 00:30:00   0   1   2
2017-01-01 00:45:00   0   1   2
2017-01-01 01:00:00   3   4   5
2017-01-01 01:15:00   3   4   5
2017-01-01 01:30:00   3   4   5
2017-01-01 01:45:00   3   4   5
2017-01-01 02:00:00   6   7   8
2017-01-01 02:15:00   6   7   8
2017-01-01 02:30:00   6   7   8
2017-01-01 02:45:00   6   7   8
2017-01-01 03:00:00   9  10  11
2017-01-01 03:15:00   9  10  11
2017-01-01 03:30:00   9  10  11
2017-01-01 03:45:00   9  10  11
2017-01-01 04:00:00  12  13  14
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 00:15:00   3   4   5
2017-01-01 00:30:00   3   4   5
2017-01-01 00:45:00   3   4   5
2017-01-01 01:00:00   3   4   5
2017-01-01 01:15:00   6   7   8
2017-01-01 01:30:00   6   7   8
2017-01-01 01:45:00   6   7   8
2017-01-01 02:00:00   6   7   8
2017-01-01 02:15:00   9  10  11
2017-01-01 02:30:00   9  10  11
2017-01-01 02:45:00   9  10  11
2017-01-01 03:00:00   9  10  11
2017-01-01 03:15:00  12  13  14
2017-01-01 03:30:00  12  13  14
2017-01-01 03:45:00  12  13  14
2017-01-01 04:00:00  12  13  14
# 时期重采样 - Period

prng = pd.period_range('2016','2017',freq = 'M')
ts = pd.Series(np.arange(len(prng)), index = prng)
print(ts)

print(ts.resample('3M').sum())  # 降采样
print(ts.resample('15D').ffill())  # 升采样
2016-01     0
2016-02     1
2016-03     2
2016-04     3
2016-05     4
2016-06     5
2016-07     6
2016-08     7
2016-09     8
2016-10     9
2016-11    10
2016-12    11
2017-01    12
Freq: M, dtype: int32
2016-01-31     0
2016-04-30     6
2016-07-31    15
2016-10-31    24
2017-01-31    33
Freq: 3M, dtype: int32
2016-01-01     0
2016-01-16     0
2016-01-31     0
2016-02-15     1
2016-03-01     2
2016-03-16     2
2016-03-31     2
2016-04-15     3
2016-04-30     3
2016-05-15     4
2016-05-30     4
2016-06-14     5
2016-06-29     5
2016-07-14     6
2016-07-29     6
2016-08-13     7
2016-08-28     7
2016-09-12     8
2016-09-27     8
2016-10-12     9
2016-10-27     9
2016-11-11    10
2016-11-26    10
2016-12-11    11
2016-12-26    11
Freq: 15D, dtype: int32
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值