import os.path
import re
import pandas
from tqsdk.tafunc import time_to_datetime
from tqz_extern.json_operator import TQZJsonOperator
import warnings
warnings.filterwarnings('ignore')
# pandas.set_option('display.max_columns', None)
# pandas.set_option('display.max_rows', None)
class TQZTempOnePieceData:
__trading_time_json = TQZJsonOperator.tqz_load_jsonfile(jsonfile='./config/trading_time.json')
@classmethod
def dump_format_data(cls):
date_str_list = ['20230417', '20230418', '20230419', '20230420', '20230421',
'20230424', '20230425', '20230426', '20230427', '20230428']
for date_str in date_str_list:
target_fold = f'D:/ctp_data/level2_data/{date_str}'
if os.path.exists(target_fold) is False:
os.mkdir(target_fold)
day_path = f'D:\ctp_data\CTP_{date_str}.csv'
night_path = f'D:\ctp_data\CTP_{date_str}_night.csv'
single_day_df = cls.__get_single_trade_day_df(day_path=day_path, night_path=night_path)
instruments = list(set(single_day_df['InstrumentID']))
for instrument in instruments:
single_instrument_df = single_day_df[single_day_df['InstrumentID'] == instrument]
single_instrument_df.reset_index(inplace=True)
del single_instrument_df['index']
# print("instrument: " + str(instrument))
# print("single_instrument_df: " + str(single_instrument_df))
cls.__clean_format_data(
instrument=instrument,
format_data=single_instrument_df
).to_csv(f'{target_fold}/{instrument}.csv', index=False)
@classmethod
def __clean_format_data(cls, instrument, format_data):
"""
根据 instrument 的 session时间 清洗数据.
"""
format_data['time'] = format_data['datetime'].str.split(" ", expand=True)[1]
session_list = []
for key, value in cls.__trading_time_json.items():
if key.split('.')[1] == re.match(r'^[a-zA-Z]{1,3}', instrument).group():
session_list = cls.__trading_time_json[key]['day'] + cls.__trading_time_json[key]['night']
break
format_data['in_session'] = False
for session in session_list:
if session[0] < session[1]: # day
format_data.loc[(session[0] <= format_data['time']) & (format_data['time'] < session[1]), 'in_session'] = True
else: # may be night
format_data.loc[(session[0] <= format_data['time']) | (format_data['time'] < session[1]), 'in_session'] = True
format_data_in_session = format_data[format_data['in_session'] == True]
format_data_in_session.reset_index(inplace=True)
del format_data_in_session['index']
format_data_in_session.drop(['Unnamed: 0', 'in_session', 'time'], axis=1, inplace=True)
return format_data_in_session
@classmethod
def __get_single_trade_day_df(cls, day_path: str, night_path: str):
night_content = pandas.read_csv(night_path)
day_content = pandas.read_csv(day_path)
content = pandas.concat([night_content, day_content], axis=0)
content['datetime'] = content['Timestamp_in'].apply(time_to_datetime)
content.reset_index(inplace=True)
del content['index']
return content
if __name__ == '__main__':
TQZTempOnePieceData.dump_format_data()
量化交易之One Piece篇 - temp_one_piece_data.py - 新增根据品种的session时间段清洗数据的功能
于 2023-05-04 22:22:57 首次发布