在实现对数据按照时间节点进行提取的过程中我暂时未找到合适的方法进行处理。
要求:
对数据中9:00-10:15,10:30 - 12:00, 13:30 - 15:00, 21:00 - 23:00的数据进行提取。
因此我进行了一个笨办法的实现,计算效率需要后面进行优化。
import pandas as pd
import connectorx as cx
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")
class time_slice:
def __init__(self, dataframe):
self.dataframe = dataframe
def time_slice_number(self,):
begin_day = self.dataframe.iloc[0, ].name.day
end_day = self.dataframe.iloc[-1, ].name.day
begin_month = self.dataframe.iloc[0, ].name.month
end_month = self.dataframe.iloc[-1, ].name.month
return begin_day, end_day, begin_month, end_month
def time_slice_need(self):
begin_day, end_day, begin_month, end_month = time_slice(self.dataframe).time_slice_number()
df = pd.DataFrame()
if begin_day > end_day:
begin_day_1 = end_day
end_day_1 = begin_day
for time in range(begin_day_1, end_day_1 + 1):
for month in range(begin_month, end_month + 1):
begin_date = str(datetime.now().replace(month=month, day=time, hour=9, minute=0, second=0, microsecond=0))
mid_date_end = str(datetime.now().replace(month=month, day=time, hour=10, minute=0, second=0, microsecond=0))
mid_date_begin = str(datetime.now().replace(month=month, day=time, hour=10, minute=30, second=0, microsecond=0))
end_date = str(datetime.now().replace(month=month, day=time, hour=11, minute=30, second=0, microsecond=0))
df_1 = self.dataframe[begin_date:mid_date_end][1:]
df_1 = df_1.resample('s').mean().ffill()
df_2 = self.dataframe[mid_date_begin:end_date][1:]
df_2 = df_2.resample('s').mean().ffill()
# 下午
lunch_begin = str(datetime.now().replace(month=month, day=time, hour=13, minute=30, second=0, microsecond=0))
lunch_end = str(datetime.now().replace(month=month, day=time, hour=14, minute=59, second=59, microsecond=0))
df_3 = self.dataframe[lunch_begin:lunch_end][1:]
df_3 = df_3.resample('s').mean().ffill()
# 晚上
even_begin = str(datetime.now().replace(month=month, day=time, hour=21, minute=0, second=0, microsecond=0))
even_end = str(datetime.now().replace(month=month, day=time + 1, hour=2, minute=30, second=0, microsecond=0))
df_4 = self.dataframe[even_begin:even_end][1:]
df_4 = df_4.resample('s').mean().ffill()
df = pd.concat([df, df_1, df_2, df_3, df_4])
df["date"] = df.index
return df[1:].iloc[::-1]
有更好办法的友友可以帮助下我,^ - ^