量化交易之One Piece篇 - temp_one_piece_data.py - 新增根据品种的session时间段清洗数据的功能

import os.path
import re

import pandas

from tqsdk.tafunc import time_to_datetime

from tqz_extern.json_operator import TQZJsonOperator

import warnings
warnings.filterwarnings('ignore')

# pandas.set_option('display.max_columns', None)
# pandas.set_option('display.max_rows', None)

class TQZTempOnePieceData:

    __trading_time_json = TQZJsonOperator.tqz_load_jsonfile(jsonfile='./config/trading_time.json')

    @classmethod
    def dump_format_data(cls):
        date_str_list = ['20230417', '20230418', '20230419', '20230420', '20230421',
                         '20230424', '20230425', '20230426', '20230427', '20230428']

        for date_str in date_str_list:
            target_fold = f'D:/ctp_data/level2_data/{date_str}'
            if os.path.exists(target_fold) is False:
                os.mkdir(target_fold)

            day_path = f'D:\ctp_data\CTP_{date_str}.csv'
            night_path = f'D:\ctp_data\CTP_{date_str}_night.csv'

            single_day_df = cls.__get_single_trade_day_df(day_path=day_path, night_path=night_path)

            instruments = list(set(single_day_df['InstrumentID']))
            for instrument in instruments:
                single_instrument_df = single_day_df[single_day_df['InstrumentID'] == instrument]
                single_instrument_df.reset_index(inplace=True)
                del single_instrument_df['index']

                # print("instrument: " + str(instrument))
                # print("single_instrument_df: " + str(single_instrument_df))
                cls.__clean_format_data(
                    instrument=instrument,
                    format_data=single_instrument_df
                ).to_csv(f'{target_fold}/{instrument}.csv', index=False)

    @classmethod
    def __clean_format_data(cls, instrument, format_data):
        """
        根据 instrument 的 session时间 清洗数据.
        """

        format_data['time'] = format_data['datetime'].str.split(" ", expand=True)[1]

        session_list = []
        for key, value in cls.__trading_time_json.items():
            if key.split('.')[1] == re.match(r'^[a-zA-Z]{1,3}', instrument).group():
                session_list = cls.__trading_time_json[key]['day'] + cls.__trading_time_json[key]['night']
                break

        format_data['in_session'] = False
        for session in session_list:
            if session[0] < session[1]:  # day
                format_data.loc[(session[0] <= format_data['time']) & (format_data['time'] < session[1]), 'in_session'] = True
            else:  # may be night
                format_data.loc[(session[0] <= format_data['time']) | (format_data['time'] < session[1]), 'in_session'] = True

        format_data_in_session = format_data[format_data['in_session'] == True]
        format_data_in_session.reset_index(inplace=True)
        del format_data_in_session['index']
        format_data_in_session.drop(['Unnamed: 0', 'in_session', 'time'], axis=1, inplace=True)

        return format_data_in_session

    @classmethod
    def __get_single_trade_day_df(cls, day_path: str, night_path: str):
        night_content = pandas.read_csv(night_path)
        day_content = pandas.read_csv(day_path)

        content = pandas.concat([night_content, day_content], axis=0)
        content['datetime'] = content['Timestamp_in'].apply(time_to_datetime)

        content.reset_index(inplace=True)
        del content['index']

        return content


if __name__ == '__main__':
    TQZTempOnePieceData.dump_format_data()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值