将时间变成多个特征

import os
import numpy as np
import pandas as pd

from conf.path_config import resource_dir
from conf.const import TIME_COL
from common.common_tool import make_dir_if_not_exists


class LoadData(object):
    def __init__(self, node_code, file_type='csv', sheet=None):
        """
        初始化
        :param node_code:   节点ID
        :param file_type:   文件类型,取值集合为{'csv', 'excel'}
        :param sheet:   若文件类型为excel,需要提供sheet_name
        """
        self.file_type = file_type
        self.data = None
        self.df = None
        self.node_code = node_code
        self.data_dir = os.path.join(resource_dir, str(node_code))
        if file_type == 'csv':
            self.input_path = os.path.join(self.data_dir, str(node_code) + '_features.csv')
        else:
            self.input_path = os.path.join(resource_dir, str(node_code) + '_features.xlsx')
        self.sheet = sheet

    def execute(self):
        self.load_data()
        self.add_features()

    def load_data(self):
        if self.file_type == 'csv':
            self.data = pd.read_csv(self.input_path)
        else:
            self.data = pd.read_excel(self.input_path, sheet_name=self.sheet)
        return self.data

    def add_features(self):
        self.df = self.add_features_algo(self.data)
        self.df.to_csv(os.path.join(self.data_dir, str(self.node_code) + '__features.csv'), index=False)

    @staticmethod
    def add_features_algo(df):
        """
        将输入的dict_list转换为df形式,再增加相应特征
        :param df: 输入的字典列表
        :return:
        """

        def return_season(ts):
            if ts.month % 3 == 0:
                return ts.month // 3
            else:
                return ts.month // 3 + 1

        def return_month(ts):
            return ts.month

        def return_weekday(ts):
            return ts.weekday() + 1

        def return_day(ts):
            return ts.day

        def return_hour(ts):
            return ts.hour

        def return_minute(ts):
            return ts.minute

        def return_timestamp(ts):
            return ts.timestamp()

        def sin_cos_transfer(ts_num, ts_type, is_sin=True):
            """
            :param ts_num: 季、月、周、日、时、分等日期数值输入, 值域包括:{’season', 'month', 'weekday',
            'day', 'hour', 'minute'}
            :param ts_type: 日期数据类型
            :param is_sin: 默认为正弦变换,若为False,则采用余弦变换
            :return: 正弦、余弦变换结果
            """
            common_num = ts_num * 2 * np.pi
            if ts_type == 'season':
                num = common_num / 4
            elif ts_type == 'month':
                num = common_num / 12
            elif ts_type == 'weekday':
                num = common_num / 7
            elif ts_type == 'day':
                num = common_num / 30
            elif ts_type == 'hour':
                num = common_num / 24
            elif ts_type == 'minute':
                num = common_num / 60
            elif ts_type == 'second':
                num = common_num / 60
            else:  # 针对时间戳情况
                num = common_num / (365.25 * 24 * 60 * 60)

            if is_sin:
                return np.sin(num)
            else:
                return np.cos(num)

        df[TIME_COL] = pd.to_datetime(df[TIME_COL])

        df['season'] = df[TIME_COL].apply(return_season)
        df['month'] = df[TIME_COL].apply(return_month)
        df['weekday'] = df[TIME_COL].apply(return_weekday)
        df['day'] = df[TIME_COL].apply(return_day)
        df['hour'] = df[TIME_COL].apply(return_hour)
        df['minute'] = df[TIME_COL].apply(return_minute)
        df['timestamp'] = df[TIME_COL].apply(return_timestamp)

        df['season_sin'] = df['season'].apply(lambda x: sin_cos_transfer(x, 'season', True))
        df['season_cos'] = df['season'].apply(lambda x: sin_cos_transfer(x, 'season', False))

        df['month_sin'] = df['month'].apply(lambda x: sin_cos_transfer(x, 'month', True))
        df['month_cos'] = df['month'].apply(lambda x: sin_cos_transfer(x, 'month', False))

        df['weekday_sin'] = df['weekday'].apply(lambda x: sin_cos_transfer(x, 'weekday', True))
        df['weekday_cos'] = df['weekday'].apply(lambda x: sin_cos_transfer(x, 'weekday', False))

        df['day_sin'] = df['day'].apply(lambda x: sin_cos_transfer(x, 'day', True))
        df['day_cos'] = df['day'].apply(lambda x: sin_cos_transfer(x, 'day', False))

        df['hour_sin'] = df['hour'].apply(lambda x: sin_cos_transfer(x, 'hour', True))
        df['hour_cos'] = df['hour'].apply(lambda x: sin_cos_transfer(x, 'hour', False))

        df['minute_sin'] = df['minute'].apply(lambda x: sin_cos_transfer(x, 'minute', True))
        df['minute_cos'] = df['minute'].apply(lambda x: sin_cos_transfer(x, 'minute', False))

        df['ts_sin'] = df['timestamp'].apply(lambda x: sin_cos_transfer(x, 'timestamp', True))
        df['ts_cos'] = df['timestamp'].apply(lambda x: sin_cos_transfer(x, 'timestamp', False))
        return df


#
if __name__ == '__main__':
    # LoadData('冯记烤鱼').execute()
    # LoadData('思朗食品').execute()
    # LoadData('杏林春').execute()
    # LoadData('深科五金').execute()
    # LoadData('总的数据').execute()
    LoadData('百嘉宜食品').execute()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值