特征工程之获取timeofday、dayofweek、isweekday,并进行one-hot编码

在做预测时,需要获取一天中的第几小时、一周中的第几天、是工作日还是周末等信息,将其进行one-hot编码并输入到模型中。
最终格式,(时间片数,时间特征维度)

import pandas as pd
import numpy as np
import time

def timestamp2vec(timestamps):
    # tm_wday range [0, 6], Monday is 0 and sunday is 6
    #7872
    vec = [time.strptime(str(t,encoding='utf-8'), '%Y%m%d%H') for t in timestamps]  # python3

    # vec = [time.strptime(t[:8], '%Y%m%d').tm_wday for t in timestamps]  # python2
    ret = []

    for i in vec:
        #timeofday
        arr = np.zeros(24).tolist()
        hour = i.tm_hour
        arr[hour] = 1
        #day of week
        ii = i.tm_wday
        v = [0 for _ in range(7)]
        v[ii] = 1
        #week or weekend
        if ii >= 5:
            v.append(0)  # weekend
        else:
            v.append(1)  # weekday
        arr.extend(v)

        ret.append(arr)
    return np.asarray(ret)

def complete_time(dt):
    if(dt<10):
        dt = '0'+str(dt)
    else:
        dt = str(dt)
    return dt

def extrac_date_hour(date_range):
    result = []
    for date in date_range:
        year = str(date.year)
        month = complete_time(date.month)
        day = complete_time(date.day)
        hour = complete_time(date.hour)
        temp_str = year+month+day+hour
        temp_str = str.encode(temp_str)
        result.append(temp_str)
    return result

if __name__ == '__main__':
    #输入日期范围
    date_range = pd.date_range(start='2015-02-01 00:00:00',end='2015-07-01 23:00:00',freq='h',normalize=True,closed='left')
    filename = 'cd_time.npz'
    num_nodes = 225
	#提取日期
    result = extrac_date_hour(date_range)
    #获取one-hot编码
    timestamps = timestamp2vec(result)

    print(timestamps.shape) #(timesteps,32),32=24+7+1,24小时,7天,1是否周末
    timestamps = np.expand_dims(timestamps,axis=1)
    print(timestamps.shape)
    #timestamps = np.repeat(timestamps,num_nodes,axis=1)
    #print(timestamps.shape)

    # np.savez(filename,data=timestamps)

直接对pandas中的日期字段进onehot编码

def get_time_embed(df):
    '''
    timeofday, dayofweek,isworkday = 24+7+1 = 32
    '''
    df = df.copy()
    df['dt'] = pd.to_datetime(df['dt'], format='%Y-%m-%d')
    df['dayofweek'] = df['dt'].apply(lambda x: x.dayofweek)
    df['is_workday'] = df['dt'].apply(lambda x: 1 if (x.dayofweek == 5 or x.dayofweek == 6) else 0)

    df['hour'].replace(-9999.0,0,inplace=True)

    df_timeofday = pd.get_dummies(df['hour'])
    df_timeofday.columns = ['hour_'+str(i) for i in range(24)]

    df_dayofweek = pd.get_dummies(df['dayofweek'])
    df_dayofweek.columns = ['week_'+str(i) for i in range(7)]

    df_time = pd.concat([df_timeofday,df_dayofweek,df['is_workday']],axis=1)

    return df_time
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值