// base_dict.py
import sys
def price_lis(mktdata, key):
y_10_price = mktdata.groupby('mktdate', group_keys=False, sort=False)[key].apply(
lambda x: -x.diff(-10).fillna(x - x.iloc[-1])
).values
return y_10_price
price_tick_dic = {
'IF00': 0.2, 'IH00': 0.2, 'IC00': 0.2, 'TF00': 0.005, 'T00': 0.005,
'cu00': 10.0, 'al00': 5.0, 'zn00': 5.0, 'ni00': 10.0, 'sn00': 10.0, 'au00': 0.05, 'ag00': 1.0,
'rb00': 1.0, 'fu00': 1.0, 'bu00': 2.0, 'ru00': 5,
'm00': 1.0, 'y00': 2.0, 'a00': 1.0, 'p00': 2.0, 'c00': 1.0, 'cs00': 1.0, 'l00': 5.0, 'v00': 1.0,
'eg00': 1.0, 'pp00': 1.0, 'j00': 0.5, 'jm00': 0.5, 'i00': 0.5, 'pg00': 1.0,
'SR00': 1.0, 'CF00': 5, 'ZC00': 0.2, 'FG00': 1.0, 'TA00': 2.0, 'MA00': 1.0, 'SA00': 1.0,
'OI00': 1.0, 'RM00': 1.0, 'AP00': 1.0, 'CJ00': 5.0, 'PF00': 2.0,
'sc00': 0.1, 'sc01': 0.1,
'AgTD': 1.0, 'hc00': 1,
}
factor_dic = {
'IF00': 300, 'IH00': 300, 'IC00': 200, 'TF00': 10000, 'T00': 10000,
'cu00': 5.0, 'al00': 5.0, 'zn00': 5.0, 'ni00': 1.0, 'sn00': 1.0, 'au00': 1000, 'ag00': 15,
'rb00': 10.0, 'fu00': 10.0, 'bu00': 10.0, 'ru00': 10,
'm00': 10, 'y00': 10, 'a00': 10, 'p00': 10, 'c00': 10, 'cs00': 10, 'l00': 5, 'v00': 5,
'eg00': 10, 'pp00': 5.0, 'j00': 100, 'jm00': 60, 'i00': 100, 'pg00': 20,
'SR00': 10, 'CF00': 5, 'ZC00': 100, 'FG00': 20, 'TA00': 5.0, 'MA00': 10.0, 'SA00': 20,
'OI00': 10.0, 'RM00': 10.0, 'AP00': 10.0, 'CJ00': 5.0, 'PF00': 5.0,
'sc00': 1000, 'sc01': 1000,
'AgTD': 1, 'hc00': 10,
}
fees_dic = {
'IF00': 0.23/10000,
'rb00': 0.5 * 1/10000,
'ru00': -0.75,
'ag00': 0.05 * 1/1000 * 1/2 * 1,
'v00': -1 * 0.3,
'TA00': -2.1,
'CF00': -3.01,
'i00': 1/10000,
'sn00': -1.5,
'p00': -2.5 * 0.6,
'zn00': -3 * 0.5 * 0.5,
'eg00': -3 * 0.3,
}
instrument_dic = {
'IF00': {
'price_tick': 0.2,
'factor': 300,
'date_list': [[20200601, 20200617],
[20200618, 20200715],
[20200716, 20200819],
[20200820, 20200916],
[20200917, 20201015],
[20201016, 20201118],
[20201119, 20201216],
[20201217, 20210114],
[20210115, 20210210],
[20210218, 20210318],
[20210319, 20210414],
[20210415, 20210519],
[20210520, 20210617],
[20210618, 20210714],
[20210715, 20210818],
[20210819, 20210915],
[20210916, 20211014],
[20211015, 20211117],
[20211118, 20211216],
[20211217, 20220120],
[20220121, 20220216],
[20220217, 20220316],
[20220317, 20220414],
[20220414, 20220518],
[20220519, 20220616],
[20220617, 20220714],
[20220715, 20220817],
[20220818, 20220908],
],
'leading_list': ['IF2006', 'IF2007', 'IF2008', 'IF2009', 'IF2010', 'IF2011', 'IF2012', 'IF2101',
'IF2102', 'IF2103', 'IF2104', 'IF2105', 'IF2106', 'IF2107', 'IF2108', 'IF2109',
'IF2110', 'IF2111', 'IF2112', 'IF2201', 'IF2202', 'IF2203', 'IF2204', 'IF2205', 'IF2206', 'IF2207', 'IF2208', 'IF2209']
},
'IH00': {
'price_tick': 0.2,
'factor': 300,
'date_list': [],
'leading_list': [],
},
'IC00': {
'price_tick': 0.2,
'factor': 200,
'date_list': [
[20210319, 20210414],
[20210415, 20210519],
[20210520, 20210617],
[20210618, 20210714],
[20210715, 20210818],
[20210819, 20210915],
[20210916, 20211014],
[20211015, 20211117],
[20211118, 20211215],
[20211216, 20220119],
[20220120, 20220216],
[20220217, 20220316],
[20220317, 20220413],
[20220414, 20220517],
[20220518, 20220615],
[20220616, 20220714],
[20220715, 20220817],
[20220818, 20220908],
],
'leading_list': ['IC2104', 'IC2105', 'IC2106', 'IC2107', 'IC2108', 'IC2109', 'IC2110', 'IC2111', 'IC2112', 'IC2201', 'IC2202', 'IC2203', 'IC2204', 'IC2205', 'IC2206', 'IC2207', 'IC2208', 'IC2209'],
},
'hc00': {
'price_tick': 1,
'factor': 10,
'date_list': [
[20210409, 20210813],
[20210816, 20211201],
[20211202, 20220329],
[20220330, 20220830],
[20220831, 20221120],
],
'leading_list': ['hc2110', 'hc2201', 'hc2205', 'hc2210', 'hc2301'],
},
'TF00': {
'price_tick': 0.005,
'factor': 10000,
'date_list': [],
'leading_list': [],
},
'T00': {
'price_tick': 0.005,
'factor': 10000,
'date_list': [],
'leading_list': [],
},
'cu00': {
'price_tick': 10.0,
'factor': 5.0,
'date_list': [],
'leading_list': [],
},
'al00': {
'price_tick': 5.0,
'factor': 5.0,
'date_list': [],
'leading_list': [],
},
'zn00': {
'price_tick': 5.0,
'factor': 5.0,
'date_list': [
[20210115, 20210222],
[20210223, 20210318],
[20210319, 20210419],
[20210420, 20210520],
[20210521, 20210623],
[20210624, 20210723],
[20210726, 20210820],
[20210823, 20210923],
[20210924, 20211022],
[20211025, 20211111],
[20211112, 20211221],
[20211222, 20220118],
[20220119, 20220218],
[20220221, 20220317],
[20220318, 20220420],
[20220421, 20220524],
[20220525, 20220623],
[20220624, 20220725],
[20220726, 20220824],
[20220825, 20220926],
[20220927, 20221025],
[20221026, 20221125],
],
'leading_list': ['zn2103', 'zn2104', 'zn2105', 'zn2106', 'zn2107', 'zn2108', 'zn2109', 'zn2110', 'zn2111', 'zn2112', 'zn2201', 'zn2202', 'zn2203', 'zn2204', 'zn2205', 'zn2206', 'zn2207', 'zn2208', 'zn2209', 'zn2210', 'zn2211', 'zn2212'],
},
'rb00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [
[20200601, 20200824],
[20200825, 20201204],
[20201207, 20210406],
[20210406, 20210809],
[20210810, 20211124],
[20211125, 20220327],
[20220328, 20220829],
[20220830, 20221129],
],
'leading_list': ['rb2010', 'rb2101', 'rb2105', 'rb2110', 'rb2201', 'rb2205', 'rb2210', 'rb2301'],
},
'sn00': {
'price_tick': 10.0,
'factor': 1.0,
'date_list': [
[20210308, 20210331],
[20210401, 20210506],
[20210507, 20210616],
[20210617, 20210722],
[20210723, 20210826],
[20210827, 20210924],
[20210927, 20211022],
[20211025, 20211123],
[20211124, 20211221],
[20211222, 20220122],
[20220120, 20220222],
[20220223, 20220425],
[20220426, 20220529],
[20220530, 20220627],
[20220628, 20220725],
[20220726, 20220817],
[20220818, 20220923],
[20220924, 20221012],
[20221013, 20221130],
],
'leading_list': ['sn2105', 'sn2106', 'sn2107', 'sn2108', 'sn2109', 'sn2110', 'sn2111', 'sn2112', 'sn2201', 'sn2202', 'sn2203', 'sn2205', 'sn2206', 'sn2207', 'sn2208', 'sn2209', 'sn2210', 'sn2211', 'sn2212'],
},
'au00': {
'price_tick': 0.05,
'factor': 1000,
'date_list': [],
'leading_list': [],
},
'ag00': {
'price_tick': 1.0,
'factor': 15,
'date_list': [
[20200601, 20201125],
[20201126, 20210125],
[20210126, 20210525],
[20210526, 20211126],
[20211129, 20220525],
[20220526, 20221025],
],
'leading_list': ['ag2012', 'ag2102', 'ag2106', 'ag2112', 'ag2206', 'ag2212'],
},
'fu00': {
'price_tick': 1.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'bu00': {
'price_tick': 2.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'ru00': {
'price_tick': 5,
'factor': 10,
'date_list': [
[20200601, 20200804],
[20200805, 20201130],
[20201201, 20210401],
[20210402, 20210808],
[20210809, 20211123],
[20211124, 20220330],
[20220331, 20220808],
[20220809, 20221030],
],
'leading_list': ['ru2009', 'ru2101', 'ru2105', 'ru2109', 'ru2201', 'ru2205', 'ru2209', 'ru2301'],
},
'm00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [
[20200601, 20200803],
[20200804, 20201111],
[20201112, 20210330],
[20210331, 20210809],
[20210810, 20211202],
[20211203, 20220329],
[20220330, 20220506],
],
'leading_list': ['m2009', 'm2101', 'm2105', 'm2109', 'm2201', 'm2205', 'm2209'],
},
'y00': {
'price_tick': 2.0,
'factor': 10,
'date_list': [
[20200601, 20200804],
[20200805, 20201210],
[20201211, 20210408],
[20210409, 20210813],
[20210816, 20211208],
[20211209, 20220323],
[20220324, 20220506],
],
'leading_list': ['y2009', 'y2101', 'y2105', 'y2109', 'y2201', 'y2205', 'y2209'],
},
'a00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [
[20201217, 20210420],
[20210421, 20210813],
[20210816, 20211019],
[20211020, 20211207],
[20211208, 20220222],
[20220223, 20220614],
[20220615, 20220824],
[20220825, 20221014],
],
'leading_list': ['a2105', 'a2109', 'a2111', 'a2201', 'a2203', 'a2207', 'a2209', 'a2211'],
},
'p00': {
'price_tick': 2.0,
'factor': 10,
'date_list': [
[20200601, 20200812],
[20200813, 20201208],
[20201209, 20201210],
[20201211, 20210411],
[20210412, 20210816],
[20210817, 20211209],
[20211210, 20220331],
[20220401, 20220816],
[20220817, 20221124],
],
'leading_list': ['p2009', 'p2101', 'p2102', 'p2105', 'p2109', 'p2201', 'p2205', 'p2209', 'p2301'],
},
'c00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [],
'leading_list': [],
},
'cs00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [],
'leading_list': [],
},
'l00': {
'price_tick': 5.0,
'factor': 5,
'date_list': [],
'leading_list': [],
},
'v00': {
'price_tick': 1.0,
'factor': 5,
'date_list': [
[20200601, 20200814],
[20200817, 20201216],
[20201217, 20210413],
[20210414, 20210818],
[20210819, 20211207],
[20211208, 20220420],
[20220421, 20220821],
[20220822, 20221124],
],
'leading_list': ['v2009', 'v2101', 'v2105', 'v2109', 'v2201', 'v2205', 'v2209', 'v2301'],
},
'eg00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [
[20201211, 20210414],
[20210415, 20210823],
[20210824, 20211214],
[20211215, 20220422],
[20220425, 20220819],
[20220820, 20221120],
],
'leading_list': ['eg2105', 'eg2109', 'eg2201', 'eg2205', 'eg2209', 'eg2301'],
},
'eb': {
'price_tick': 1.0,
'factor': 5,
'date_list': [],
'leading_list': [],
},
'pp00': {
'price_tick': 1.0,
'factor': 5.0,
'date_list': [],
'leading_list': [],
},
'j00': {
'price_tick': 0.5,
'factor': 100,
'date_list': [],
'leading_list': [],
},
'jm00': {
'price_tick': 0.5,
'factor': 60,
'date_list': [],
'leading_list': [],
},
'i00': {
'price_tick': 0.5,
'factor': 100,
'date_list': [
[20210331, 20210803],
[20210804, 20211130],
[20211201, 20220323],
[20220324, 20220803],
[20220804, 20220809],
],
'leading_list': ['i2109', 'i2201', 'i2205', 'i2209', 'i2301'],
},
'pg00': {
'price_tick': 1.0,
'factor': 20,
'date_list': [],
'leading_list': [],
},
'SR00': {
'price_tick': 1.0,
'factor': 10,
'date_list': [],
'leading_list': [],
},
'CF00': {
'price_tick': 5,
'factor': 5,
'date_list': [
[20200601, 20200810],
[20200811, 20201207],
[20201208, 20210408],
[20210409, 20210805],
[20210806, 20211201],
[20211202, 20220414],
[20220415, 20220711],
],
'leading_list': ['CF009', 'CF101', 'CF105', 'CF109', 'CF201', 'CF205', 'CF209'],
},
'ZC00': {
'price_tick': 0.2,
'factor': 100,
'date_list': [],
'leading_list': [],
},
'FG00': {
'price_tick': 1.0,
'factor': 20,
'date_list': [],
'leading_list': [],
},
'TA00': {
'price_tick': 2.0,
'factor': 5.0,
'date_list': [
[20211210, 20220411],
[20220412, 20220620],
],
'leading_list': ['TA205', 'TA209'],
},
'MA00': {
'price_tick': 1.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'SA00': {
'price_tick': 1.0,
'factor': 20,
'date_list': [],
'leading_list': [],
},
'OI00': {
'price_tick': 1.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'RM00': {
'price_tick': 1.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'AP00': {
'price_tick': 1.0,
'factor': 10.0,
'date_list': [],
'leading_list': [],
},
'CJ00': {
'price_tick': 5.0,
'factor': 5.0,
'date_list': [],
'leading_list': [],
},
'PF00': {
'price_tick': 2.0,
'factor': 5.0,
'date_list': [],
'leading_list': [],
},
'sc00': {
'price_tick': 0.1,
'factor': 1000,
'date_list': [],
'leading_list': [],
},
'sc01': {
'price_tick': 0.1,
'factor': 1000,
'date_list': [],
'leading_list': [],
},
'AgTD': {
'price_tick': 1.0,
'factor': 1,
'date_list': [],
'leading_list': [],
}
}
def dateToContract(instrument, date):
dateList = instrument_dic[instrument]['date_list']
leadingList = instrument_dic[instrument]['leading_list']
if len(dateList) != len(leadingList):
sys.exit('dismatch')
for ii in range(len(leadingList)):
if dateList[ii][0] <= int(date) <= dateList[ii][1]:
return leadingList[ii]
return False
import numpy as np
def clear_off_trading(df, axis, off_range=[(23000000, 90000000), # (230000000, 240000000),
(113000000, 133000000), (150000000, 210000000)]):
mask = np.zeros(len(df), dtype=bool)
for r in off_range:
mask |= (df[axis] > r[0]) & (df[axis] < r[1])
df.drop(df.index[mask], axis=0, inplace=True)
if __name__ == '__main__':
v00_main_contract = dateToContract(instrument='v00', date='20221020')
eg00_main_contract = dateToContract(instrument='eg00', date='20221020')
print("v00_main_contract: " + str(v00_main_contract))
print("eg00_main_contract: " + str(eg00_main_contract))
// constant.py
from enum import Enum
class Exchange(Enum):
"""
Exchange items.
"""
CFFEX = 'CFFEX'
DCE = 'DCE'
SHFE = 'SHFE'
CZCE = 'CZCE'
// main.py
import datetime
import os
import re
import shutil
from constant import Exchange
import pandas as pd
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
from base_dict import dateToContract, clear_off_trading
class HftDataPath:
@classmethod
def data_path(cls, date_str: str, exchange: Exchange) -> str:
path_str: str = ''
if exchange == Exchange.DCE:
path_str = f'{cls.source_data_dir()}/ht_dce_L2-{date_str}.log'
elif exchange == Exchange.SHFE:
pass
elif exchange == Exchange.CZCE:
pass
elif exchange == Exchange.CFFEX:
pass
else:
pass
return path_str
@classmethod
def source_data_dir(cls) -> str:
# return "C:/Users/tqz_trader/Desktop/hft_data_parser"
return "E:\海通期货\大连L2\ITfuwu_dce_l2\ITfuwu_dce_l2_2021\ht_dce_L2-2021_05"
@classmethod
def output_data_dir(cls) -> str:
# return "C:/Users/tqz_trader/Desktop/hft_data_parser/output_data"
return f'{cls.source_data_dir()}/output_data'
@classmethod
def output_main_data_dir(cls) -> str:
# return "C:/Users/tqz_trader/Desktop/hft_data_parser/output_main_data"
return f'{cls.source_data_dir()}/output_main_data'
class HftDataOperator:
__columns: list = [
"ContractID",
"UpdateTime",
"LastPrice",
"MatchTotQty",
"LastOpenInterest",
"OpenInterest",
"InterestChg",
"Turnover",
"BidPrice1",
"BidVolume1",
"BidImplyVolume1",
"BidPrice2",
"BidVolume2",
"BidImplyVolume2",
"BidPrice3",
"BidVolume3",
"BidImplyVolume3",
"BidPrice4",
"BidVolume4",
"BidImplyVolume4",
"BidPrice5",
"BidVolume5",
"BidImplyVolume5",
"AskPrice1",
"AskVolume1",
"AskImplyVolume1",
"AskPrice2",
"AskVolume2",
"AskImplyVolume2",
"AskPrice3",
"AskVolume3",
"AskImplyVolume3",
"AskPrice4",
"AskVolume4",
"AskImplyVolume4",
"AskPrice5",
"AskVolume5",
"AskImplyVolume5",
]
__format_columns: list = [
'Date',
'Time',
'Symbol',
'LastPrice',
'AccVolume',
'OpenInterest',
'Turnover',
'HighLimit',
'LowLimit',
'BidPrice1',
'BidVolume1',
'BidPrice2',
'BidVolume2',
'BidPrice3',
'BidVolume3',
'BidPrice4',
'BidVolume4',
'BidPrice5',
'BidVolume5',
'AskPrice1',
'AskVolume1',
'AskPrice2',
'AskVolume2',
'AskPrice3',
'AskVolume3',
'AskPrice4',
'AskVolume4',
'AskPrice5',
'AskVolume5',
'LocalTime',
'LocalNS',
'TotalBuyQty',
'TotalSellQty',
'AvgBuyPrice',
'AvgSellPrice',
'timestamp'
]
def __init__(self, date_str: str, exchange: Exchange, filter_option: bool = True):
path = HftDataPath.data_path(date_str=date_str, exchange=exchange)
# path = f'ht_dce_L2-{date_str}.log'
# path = f'test_{path}'
assert os.path.exists(path), f'{path} not exists.'
self.__date_str: str = date_str
self.__data: pd.DataFrame() = pd.read_csv(path, low_memory=False, header=None)
self.__data.columns = self.__columns
self.__exchange = exchange
self.__rets_map: {str: pd.DataFrame()} = None
if filter_option:
self.__data = self.__data[~self.__data.ContractID.str.contains('-')]
self.__data.reset_index(inplace=True)
del self.__data['index']
self.__reset_format()
self.__data.dropna(inplace=True)
def split_by_symbol(self) -> {str: pd.DataFrame()}:
assert self.__data is not None, f'__data is None'
self.__rets_map: dict = {}
for symbol in list(set(self.__data.Symbol.tolist())):
sub_data = self.__data[self.__data['Symbol'] == symbol]
sub_data.reset_index(inplace=True)
del sub_data['index']
self.__rets_map[symbol] = sub_data
return self
def dump_to_csv(self):
assert self.__rets_map is not None, f'__rets_map is None'
# check output 目标目录 是否存在
target_dir = HftDataPath.output_data_dir()
if os.path.exists(path=target_dir) is False:
os.mkdir(target_dir)
# check 品种&对应日期 目标目录 是否存在
symbol_dir_map: {str: str} = {}
date_str = self.__date_str.replace("_", "")
for symbol in self.rets_map().keys():
symbol_dir = f'{target_dir}/data{re.match(r"^[a-zA-Z]{1,3}", symbol).group()}00'
if os.path.exists(path=symbol_dir) is False:
os.mkdir(symbol_dir)
symbol_date_dir = f'{symbol_dir}/{date_str}'
if os.path.exists(path=symbol_date_dir) is False:
os.mkdir(symbol_date_dir)
symbol_dir_map[symbol] = symbol_date_dir
# write to csv
for symbol, symbol_df in self.rets_map().items():
target_path = f'{symbol_dir_map[symbol]}/{symbol}_{date_str}.csv'
symbol_df.to_csv(target_path, index=False)
def data(self) -> pd.DataFrame():
return self.__data
def rets_map(self) -> {str: pd.DataFrame()}:
return self.__rets_map
def __reset_format(self):
assert self.__data is not None, f'__data is None.'
# Date
self.__data['Date'] = self.__date_str.replace('_', '')
# Time, Symbol
self.__data.rename(columns={'UpdateTime': 'Time'}, inplace=True)
self.__data.rename(columns={'ContractID': 'Symbol'}, inplace=True)
# HighLimit, LowLimit
self.__data['HighLimit'] = 1000000
self.__data['LowLimit'] = 0
# LocalTime, timestamp
self.__data['LocalTime'] = self.__data.Time
self.__data['LocalTime'] = self.__data.LocalTime.str.replace(':', '', regex=True)
self.__data['LocalTime'] = self.__data.LocalTime.str.replace('.', '', regex=True)
self.__data['timestamp'] = self.__data.LocalTime
# LocalNS
self.__data["LocalNS"] = pd.to_datetime(self.__date_str.replace('_', '-') + ' ' + self.__data['Time']).apply(
lambda x: x.value)
# TotalBuyQty, TotalSellQty, AvgBuyPrice, AvgSellPrice
self.__data['TotalBuyQty'] = 0
self.__data['TotalSellQty'] = 0
self.__data['AvgBuyPrice'] = 0
self.__data['AvgSellPrice'] = 0
# AccVolume
self.__data['AccVolume'] = self.__data.MatchTotQty
# to format
self.__data = self.__data[self.__format_columns]
class HftDataManager:
@classmethod
def parser(cls, start_date_str: str, end_date_str: str, exchange: Exchange):
"""
clear source hft data
:param start_date_str: begin date of source file
:param end_date_str: end date of source file
:param exchange: enum of exchang
"""
start_date, end_date = datetime.datetime.strptime(start_date_str, '%Y_%m_%d').date(), datetime.datetime.strptime(end_date_str, '%Y_%m_%d').date()
while True:
if start_date > end_date:
break
format_date_str = str(start_date).replace("-", "_")
if os.path.exists(path=HftDataPath.data_path(date_str=format_date_str, exchange=exchange)):
HftDataOperator(date_str=format_date_str, exchange=exchange).split_by_symbol().dump_to_csv()
print(f'date({start_date}) parser over.')
start_date += datetime.timedelta(days=1)
@classmethod
def refresh_main_contracts_dir(cls, target_contracts: list, start_date_str: str, end_date_str: str):
"""
refresh main contract's dir
:param target_contracts: target contracts which need refresh
:param start_date_str: begin date of source file
:param end_date_str: end date of source file
"""
source_dir = HftDataPath.output_data_dir()
assert os.path.exists(path=source_dir), f'{source_dir} not exist.'
target_dir = HftDataPath.output_main_data_dir()
if os.path.exists(path=target_dir) is True:
shutil.rmtree(target_dir)
os.mkdir(target_dir)
for contract in target_contracts:
start_date, end_date = datetime.datetime.strptime(start_date_str,'%Y_%m_%d').date(), datetime.datetime.strptime(end_date_str, '%Y_%m_%d').date()
source_sub_dir = f'{source_dir}/data{contract}'
target_sub_dir = f'{target_dir}/data{contract}'
if os.path.exists(path=target_sub_dir) is False:
os.mkdir(target_sub_dir)
while True:
if start_date > end_date:
break
format_date_str = str(start_date).replace('-', '')
main_contract = dateToContract(instrument=contract, date=format_date_str)
source_main_contract_dir = f'{source_sub_dir}/{format_date_str}'
target_main_contract_dir = f'{target_sub_dir}/{format_date_str}'
if os.path.exists(path=source_main_contract_dir):
os.mkdir(target_main_contract_dir)
source_main_contract_path = f'{source_main_contract_dir}/{main_contract}_{format_date_str}.csv'
target_main_contract_path = f'{target_main_contract_dir}/{main_contract}_{format_date_str}.csv'
if os.path.exists(path=source_main_contract_path):
shutil.copy(src=source_main_contract_path, dst=target_main_contract_path)
# clear main contract data.
target_df = pd.read_csv(target_main_contract_path)
clear_off_trading(target_df, "timestamp")
target_df = target_df.drop_duplicates(subset='timestamp', keep='first', inplace=False)
target_df = target_df.reset_index(drop=True)
target_df.to_csv(target_main_contract_path, index=False)
start_date += datetime.timedelta(days=1)
if __name__ == '__main__':
_start_date_str = "2021_05_14"
_end_date_str = "2021_05_24"
HftDataManager.parser(start_date_str=_start_date_str, end_date_str=_end_date_str, exchange=Exchange.DCE)
# HftDataManager.refresh_main_contracts_dir(
# target_contracts=['v00', 'eg00'],
# start_date_str=_start_date_str,
# end_date_str=_end_date_str
# )