做数据处理的时候有时候需要对日期进行处理或计算
下面给出几种常见的日期处理和运算
日期数据类型转换
有时候我们拿到的日期是Object类型,这时我们需要先把日期数据转换成时间类型
df['comment_Date'] = pd.to_datetime(df['comment_date'])
df.Date.dtypes
提取日期中的信息(输出为int类型)
# 提取日期中的年/月/日/小时/分钟
df['year'] = pd.to_datetime(df['comment_Date']).dt.year
df['Quauter'] = pd.to_datetime(df['comment_Date']).dt.quauter
df['month'] = pd.to_datetime(df['comment_Date']).dt.month
df['day'] = pd.to_datetime(df['comment_Date']).dt.day
df['date'] = pd.to_datetime(df['comment_Date']).dt.date # 输出类型为Object
df['dayofweek'] = pd.to_datetime(df['comment_Date']).dt.dayofwork # 周一二三...
df['hour'] = pd.to_datetime(df['comment_Date']).dt.hour
df['minute'] = pd.to_datetime(df['comment_Date']).dt.minute
提取日期中的信息(输出为datetime64类型)
1. 向下取整
df['second'] = pd.to_datetime(df['comment_Date']).dt.floor("S")
df['miniute'] = pd.to_datetime(df['comment_Date']).dt.floor("min")
df['hour'] = pd.to_datetime(df['comment_Date']).dt.floor("H")
df['day'] = pd.to_datetime(df['comment_Date']).dt.floor("D")
2. 向上取整
df['second'] = pd.to_datetime(df['comment_Date']).dt.ceil("S")
df['miniute'] = pd.to_datetime(df['comment_Date']).dt.ceil("min")
df['hour'] = pd.to_datetime(df['comment_Date']).dt.ceil("H")
df['day'] = pd.to_datetime(df['comment_Date']).dt.ceil("D")
3. 四舍五入
df['second'] = pd.to_datetime(df['comment_Date']).dt.round("S")
df['miniute'] = pd.to_datetime(df['comment_Date']).dt.round("min")
df['hour'] = pd.to_datetime(df['comment_Date']).dt.round("H")
df['day'] = pd.to_datetime(df['comment_Date']).dt.round("D")
日期计算
1. 计算时间差
# 数据类型均为 datetime[64]
df['longevity'] = df.comment_date - df.upload_date
2. 提取时间差
df['month'] =df['longevity'].astype('timedelta64[M]').astype(int)
df['day'] =df['longevity'].astype('timedelta64[D]').astype(int)
df['hour'] =df['longevity'].astype('timedelta64[h]').astype(int)
df['minute'] =df['longevity'].astype('timedelta64[m]').astype(int)
df['second'] =df['longevity'].astype('timedelta64[s]').astype(int)
3. 对日期做加减法
# 增加一天
df['next_day'] = df['comment_date'] + pd.Timedelta(days=1)
# 减少一天
df['last_day'] = df['comment_date'] - pd.Timedelta(days=1)
# 日期运算上限
df['next_weeks'] = df['comment_date'] + pd.Timedelta(weeks=1)
# 日期运算下限
df2['next_nanoseconds'] = df['comment_date'] + pd.Timedelta(nanoseconds=1)
PS:allowed keywords are [weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]