import os
import numpy as np
import pandas as pd
from conf.path_config import resource_dir
from conf.const import TIME_COL
from common.common_tool import make_dir_if_not_exists
class LoadData(object):
def __init__(self, node_code, file_type='csv', sheet=None):
"""
初始化
:param node_code: 节点ID
:param file_type: 文件类型,取值集合为{'csv', 'excel'}
:param sheet: 若文件类型为excel,需要提供sheet_name
"""
self.file_type = file_type
self.data = None
self.df = None
self.node_code = node_code
self.data_dir = os.path.join(resource_dir, str(node_code))
if file_type == 'csv':
self.input_path = os.path.join(self.data_dir, str(node_code) + '_features.csv')
else:
self.input_path = os.path.join(resource_dir, str(node_code) + '_features.xlsx')
self.sheet = sheet
def execute(self):
self.load_data()
self.add_features()
def load_data(self):
if self.file_type == 'csv':
self.data = pd.read_csv(self.input_path)
else:
self.data = pd.read_excel(self.input_path, sheet_name=self.sheet)
return self.data
def add_features(self):
self.df = self.add_features_algo(self.data)
self.df.to_csv(os.path.join(self.data_dir, str(self.node_code) + '__features.csv'), index=False)
@staticmethod
def add_features_algo(df):
"""
将输入的dict_list转换为df形式,再增加相应特征
:param df: 输入的字典列表
:return:
"""
def return_season(ts):
if ts.month % 3 == 0:
return ts.month // 3
else:
return ts.month // 3 + 1
def return_month(ts):
return ts.month
def return_weekday(ts):
return ts.weekday() + 1
def return_day(ts):
return ts.day
def return_hour(ts):
return ts.hour
def return_minute(ts):
return ts.minute
def return_timestamp(ts):
return ts.timestamp()
def sin_cos_transfer(ts_num, ts_type, is_sin=True):
"""
:param ts_num: 季、月、周、日、时、分等日期数值输入, 值域包括:{’season', 'month', 'weekday',
'day', 'hour', 'minute'}
:param ts_type: 日期数据类型
:param is_sin: 默认为正弦变换,若为False,则采用余弦变换
:return: 正弦、余弦变换结果
"""
common_num = ts_num * 2 * np.pi
if ts_type == 'season':
num = common_num / 4
elif ts_type == 'month':
num = common_num / 12
elif ts_type == 'weekday':
num = common_num / 7
elif ts_type == 'day':
num = common_num / 30
elif ts_type == 'hour':
num = common_num / 24
elif ts_type == 'minute':
num = common_num / 60
elif ts_type == 'second':
num = common_num / 60
else: # 针对时间戳情况
num = common_num / (365.25 * 24 * 60 * 60)
if is_sin:
return np.sin(num)
else:
return np.cos(num)
df[TIME_COL] = pd.to_datetime(df[TIME_COL])
df['season'] = df[TIME_COL].apply(return_season)
df['month'] = df[TIME_COL].apply(return_month)
df['weekday'] = df[TIME_COL].apply(return_weekday)
df['day'] = df[TIME_COL].apply(return_day)
df['hour'] = df[TIME_COL].apply(return_hour)
df['minute'] = df[TIME_COL].apply(return_minute)
df['timestamp'] = df[TIME_COL].apply(return_timestamp)
df['season_sin'] = df['season'].apply(lambda x: sin_cos_transfer(x, 'season', True))
df['season_cos'] = df['season'].apply(lambda x: sin_cos_transfer(x, 'season', False))
df['month_sin'] = df['month'].apply(lambda x: sin_cos_transfer(x, 'month', True))
df['month_cos'] = df['month'].apply(lambda x: sin_cos_transfer(x, 'month', False))
df['weekday_sin'] = df['weekday'].apply(lambda x: sin_cos_transfer(x, 'weekday', True))
df['weekday_cos'] = df['weekday'].apply(lambda x: sin_cos_transfer(x, 'weekday', False))
df['day_sin'] = df['day'].apply(lambda x: sin_cos_transfer(x, 'day', True))
df['day_cos'] = df['day'].apply(lambda x: sin_cos_transfer(x, 'day', False))
df['hour_sin'] = df['hour'].apply(lambda x: sin_cos_transfer(x, 'hour', True))
df['hour_cos'] = df['hour'].apply(lambda x: sin_cos_transfer(x, 'hour', False))
df['minute_sin'] = df['minute'].apply(lambda x: sin_cos_transfer(x, 'minute', True))
df['minute_cos'] = df['minute'].apply(lambda x: sin_cos_transfer(x, 'minute', False))
df['ts_sin'] = df['timestamp'].apply(lambda x: sin_cos_transfer(x, 'timestamp', True))
df['ts_cos'] = df['timestamp'].apply(lambda x: sin_cos_transfer(x, 'timestamp', False))
return df
#
if __name__ == '__main__':
# LoadData('冯记烤鱼').execute()
# LoadData('思朗食品').execute()
# LoadData('杏林春').execute()
# LoadData('深科五金').execute()
# LoadData('总的数据').execute()
LoadData('百嘉宜食品').execute()