import numpy as np
import pandas as pd
一、时间模块处理
place_order_time = data['place_order_time'].astype('str')
1. 将字符串类型转成标准时间格式
data['place_order_time'] = pd.to_datetime(place_order_time)
print(data['place_order_time'].head())
输出:
0 2016-08-01 11:05:36
1 2016-08-01 11:07:07
2 2016-08-01 11:07:40
3 2016-08-01 11:11:11
4 2016-08-01 11:11:30
Name: place_order_time, dtype: datetime64[ns]
2. 时间类型提取
ser.dt常用时间属性:
year/month/day/hour/minute/second/date
data['year'] = data['place_order_time'].dt.year
data['place_order_time'].dt.week.head() # 一年的第几周
0 31
1 31
2 31
3 31
4 31
Name: place_order_time, dtype: int64
3. 时间运算
(1)标准时间加减运算
time1 = data['place_order_time'].head()
# 两个时间表的索引不同时无法进行运算
time2 = data['place_order_time'].tail().reset_index()['place_order_time'] # 重置索引,但会生成新的一列来保留原来的索引,
print(time2-time1)
0 9 days 10:50:48
1 9 days 10:49:41
2 9 days 10:54:12
3 9 days 10:52:47
4 9 days 10:53:00
Name: place_order_time, dtype: timedelta64[ns]
(2)整体+时间戳
# 往后平移一天
data['place_order_time']+pd.Timedelta(days=1)
# 往后平移一小时
(data['place_order_time']+pd.Timedelta(hours=1)).head()
0 2016-08-01 12:05:36
1 2016-08-01 12:07:07
2 2016-08-01 12:07:40
3 2016-08-01 12:11:11
4 2016-08-01 12:11:30
Name: place_order_time, dtype: datetime64[ns]
<