在做预测时,需要获取一天中的第几小时、一周中的第几天、是工作日还是周末等信息,将其进行one-hot编码并输入到模型中。
最终格式,(时间片数,时间特征维度)
import pandas as pd
import numpy as np
import time
def timestamp2vec(timestamps):
# tm_wday range [0, 6], Monday is 0 and sunday is 6
#7872
vec = [time.strptime(str(t,encoding='utf-8'), '%Y%m%d%H') for t in timestamps] # python3
# vec = [time.strptime(t[:8], '%Y%m%d').tm_wday for t in timestamps] # python2
ret = []
for i in vec:
#timeofday
arr = np.zeros(24).tolist()
hour = i.tm_hour
arr[hour] = 1
#day of week
ii = i.tm_wday
v = [0 for _ in range(7)]
v[ii] = 1
#week or weekend
if ii >= 5:
v.append(0) # weekend
else:
v.append(1) # weekday
arr.extend(v)
ret.append(arr)
return np.asarray(ret)
def complete_time(dt):
if(dt<10):
dt = '0'+str(dt)
else:
dt = str(dt)
return dt
def extrac_date_hour(date_range):
result = []
for date in date_range:
year = str(date.year)
month = complete_time(date.month)
day = complete_time(date.day)
hour = complete_time(date.hour)
temp_str = year+month+day+hour
temp_str = str.encode(temp_str)
result.append(temp_str)
return result
if __name__ == '__main__':
#输入日期范围
date_range = pd.date_range(start='2015-02-01 00:00:00',end='2015-07-01 23:00:00',freq='h',normalize=True,closed='left')
filename = 'cd_time.npz'
num_nodes = 225
#提取日期
result = extrac_date_hour(date_range)
#获取one-hot编码
timestamps = timestamp2vec(result)
print(timestamps.shape) #(timesteps,32),32=24+7+1,24小时,7天,1是否周末
timestamps = np.expand_dims(timestamps,axis=1)
print(timestamps.shape)
#timestamps = np.repeat(timestamps,num_nodes,axis=1)
#print(timestamps.shape)
# np.savez(filename,data=timestamps)
直接对pandas中的日期字段进onehot编码
def get_time_embed(df):
'''
timeofday, dayofweek,isworkday = 24+7+1 = 32
'''
df = df.copy()
df['dt'] = pd.to_datetime(df['dt'], format='%Y-%m-%d')
df['dayofweek'] = df['dt'].apply(lambda x: x.dayofweek)
df['is_workday'] = df['dt'].apply(lambda x: 1 if (x.dayofweek == 5 or x.dayofweek == 6) else 0)
df['hour'].replace(-9999.0,0,inplace=True)
df_timeofday = pd.get_dummies(df['hour'])
df_timeofday.columns = ['hour_'+str(i) for i in range(24)]
df_dayofweek = pd.get_dummies(df['dayofweek'])
df_dayofweek.columns = ['week_'+str(i) for i in range(7)]
df_time = pd.concat([df_timeofday,df_dayofweek,df['is_workday']],axis=1)
return df_time