import os
import re
import gc
# import pandas
from tqz_extern.pandas_operator import pandas
from tqz_extern.json_operator import TQZJsonOperator
import warnings
warnings.filterwarnings("ignore")
class MarketDataParser:
session_map = TQZJsonOperator.tqz_load_jsonfile(jsonfile='../trading_time/source_trading_time.json')
source_content = None
datetime_str: str = ''
market_depth_format_data_dir: str = ''
daily_bar_dir: str = 'E:/futures_market_data/market_daily_bar_data'
daily_main_contracts_config_dir: str = 'E:/futures_market_data/daily_main_contracts_config'
main_contract_map: {str, dict} = {}
@classmethod
def run(cls, parser_datetime_list):
"""
Parse all market depth data in parser_datetime_list day.
:param parser_datetime_list:
"""
parser_datetime_list = sorted(parser_datetime_list)
for parser_datetime in parser_datetime_list:
MarketDataParser.dump_all_format_csv(datetime_str=parser_datetime).with_daily_bar().with_daily_main_contracts_config()
cls.source_content = None
cls.main_contract_map = {}
gc.collect()
@classmethod
def dump_all_format_csv(cls, datetime_str: str):
"""
Dump all instruments' format csv of one day.
:return:
"""
cls.__check_source_file(datetime_str=datetime_str)
assert cls.source_content is not None, f'cls.source_content is None.'
all_instrument_ids = list(set(cls.source_content['InstrumentID'].values))
for instrumentID in all_instrument_ids:
single_instrument_df = cls.source_content[cls.source_content["InstrumentID"] == instrumentID]
cls.__get_format_market_data(
instrument_source_df=single_instrument_df
).to_csv(f'{cls.market_depth_format_data_dir}/{instrumentID}.csv', index=False)
return cls
@classmethod
def with_daily_bar(cls):
"""
Dump daily bar.
"""
assert os.path.exists(path=cls.market_depth_format_data_dir) is True, f'Bad path: {cls.market_depth_format_data_dir} is not exist.'
if os.path.exists(path=cls.daily_bar_dir) is False:
os.makedirs(cls.daily_bar_dir, exist_ok=True)
for root, dirs, files in os.walk(cls.market_depth_format_data_dir):
for file in files:
file_path = os.path.join(root, file)
if file.split('.')[1] == 'csv':
instrument_df = pandas.read_csv(file_path)
if (len(instrument_df[instrument_df['Volume'] == 0]) != 0) or (len(instrument_df[instrument_df['OpenInterest'] == 0]) != 0):
instrument_df.drop(instrument_df[instrument_df['Volume'] == 0].index, inplace=True)
instrument_df.drop(instrument_df[instrument_df['OpenInterest'] == 0].index, inplace=True)
instrument_df.reset_index(inplace=True)
del instrument_df['index']
if 0 == len(instrument_df):
continue
last_line_df = instrument_df.tail(1)
ret_exchange_instrument = re.sub(r'\d+', '', last_line_df['ExchangeInstrument'].tolist()[0])
instrument_dir = f'{cls.daily_bar_dir}/{ret_exchange_instrument}'
if os.path.exists(path=instrument_dir) is False:
os.makedirs(instrument_dir, exist_ok=True)
instrument_path = f'{instrument_dir}/{last_line_df["ExchangeInstrument"].tolist()[0]}.csv'
if os.path.exists(path=instrument_path) is False:
last_line_map = {
'date': [cls.datetime_str],
'Open': [last_line_df['OpenPrice'].tolist()[0]],
'High': [last_line_df['HighestPrice'].tolist()[0]],
'Low': [last_line_df['LowestPrice'].tolist()[0]],
'Close': [last_line_df['LastPrice'].tolist()[0]],
'Volume': [last_line_df['Volume'].tolist()[0]],
'OpenInterest': [last_line_df['OpenInterest'].tolist()[0]],
'PriceTick': [last_line_df['PriceTick'].tolist()[0]],
'VolumeMultiple': [last_line_df['VolumeMultiple'].tolist()[0]],
}
instrument_daily_bar_df = pandas.DataFrame(last_line_map)
instrument_daily_bar_df.to_csv(instrument_path, index=False)
else:
instrument_daily_bar_df = pandas.read_csv(instrument_path)
last_line_map = {
'date': cls.datetime_str,
'Open': last_line_df['OpenPrice'].tolist()[0],
'High': last_line_df['HighestPrice'].tolist()[0],
'Low': last_line_df['LowestPrice'].tolist()[0],
'Close': last_line_df['LastPrice'].tolist()[0],
'Volume': last_line_df['Volume'].tolist()[0],
'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
'PriceTick': last_line_df['PriceTick'].tolist()[0],
'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
}
if last_line_map['date'] not in instrument_daily_bar_df['date'].astype(str).values.tolist():
instrument_daily_bar_df = instrument_daily_bar_df.append(last_line_map, ignore_index=True)
instrument_daily_bar_df['date'] = instrument_daily_bar_df['date'].astype(str)
instrument_daily_bar_df.sort_values(by='date', ascending=True, inplace=True)
instrument_daily_bar_df.reset_index(inplace=True)
del instrument_daily_bar_df['index']
instrument_daily_bar_df.to_csv(instrument_path, index=False)
if ret_exchange_instrument in cls.main_contract_map.keys():
pre_contract = cls.main_contract_map[ret_exchange_instrument]
if pre_contract['Volume'] < last_line_df['Volume'].tolist()[0]:
cls.main_contract_map[ret_exchange_instrument] = {
'ExchangeInstrument': last_line_df['ExchangeInstrument'].tolist()[0],
'Date': cls.datetime_str,
'Open': last_line_df['OpenPrice'].tolist()[0],
'High': last_line_df['HighestPrice'].tolist()[0],
'Low': last_line_df['LowestPrice'].tolist()[0],
'Close': last_line_df['LastPrice'].tolist()[0],
'Volume': last_line_df['Volume'].tolist()[0],
'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
'PriceTick': last_line_df['PriceTick'].tolist()[0],
'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
}
else:
cls.main_contract_map[ret_exchange_instrument] = {
'ExchangeInstrument': last_line_df['ExchangeInstrument'].tolist()[0],
'Date': cls.datetime_str,
'Open': last_line_df['OpenPrice'].tolist()[0],
'High': last_line_df['HighestPrice'].tolist()[0],
'Low': last_line_df['LowestPrice'].tolist()[0],
'Close': last_line_df['LastPrice'].tolist()[0],
'Volume': last_line_df['Volume'].tolist()[0],
'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
'PriceTick': last_line_df['PriceTick'].tolist()[0],
'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
}
for exchange_instru, main_contract_info in cls.main_contract_map.items():
main_contract_path = f'{cls.daily_bar_dir}/{exchange_instru}/{exchange_instru}888.csv'
if os.path.exists(path=main_contract_path) is False:
last_line_map = {
'date': [cls.datetime_str],
'instrumentID': main_contract_info['ExchangeInstrument'],
'Open': [main_contract_info['Open']],
'High': [main_contract_info['High']],
'Low': [main_contract_info['Low']],
'Close': [main_contract_info['Close']],
'Volume': [main_contract_info['Volume']],
'OpenInterest': [main_contract_info['OpenInterest']],
'PriceTick': [main_contract_info['PriceTick']],
'VolumeMultiple': [main_contract_info['VolumeMultiple']],
}
main_instrument_daily_bar_df = pandas.DataFrame(last_line_map)
main_instrument_daily_bar_df.to_csv(main_contract_path, index=False)
else:
main_instrument_daily_bar_df = pandas.read_csv(main_contract_path)
last_line_map = {
'date': cls.datetime_str,
'instrumentID': main_contract_info['ExchangeInstrument'],
'Open': main_contract_info['Open'],
'High': main_contract_info['High'],
'Low': main_contract_info['Low'],
'Close': main_contract_info['Close'],
'Volume': main_contract_info['Volume'],
'OpenInterest': main_contract_info['OpenInterest'],
'PriceTick': main_contract_info['PriceTick'],
'VolumeMultiple': main_contract_info['VolumeMultiple'],
}
if last_line_map['date'] not in main_instrument_daily_bar_df['date'].astype(str).values.tolist():
main_instrument_daily_bar_df = main_instrument_daily_bar_df.append(last_line_map, ignore_index=True)
main_instrument_daily_bar_df['date'] = main_instrument_daily_bar_df['date'].astype(str)
main_instrument_daily_bar_df.sort_values(by='date', ascending=True, inplace=True)
main_instrument_daily_bar_df.reset_index(inplace=True)
del main_instrument_daily_bar_df['index']
main_instrument_daily_bar_df.to_csv(main_contract_path, index=False)
return cls
@classmethod
def with_daily_main_contracts_config(cls):
"""
Dump daily main contracts config.
"""
if os.path.exists(path=cls.daily_main_contracts_config_dir) is False:
os.makedirs(cls.daily_main_contracts_config_dir, exist_ok=True)
main_contract_format_map: {str, str} = {}
for exchange_instru, info in cls.main_contract_map.items():
main_contract_format_map[exchange_instru] = info['ExchangeInstrument']
TQZJsonOperator.tqz_write_jsonfile(
content=main_contract_format_map,
target_jsonfile=f'{cls.daily_main_contracts_config_dir}/{cls.datetime_str}_main_contracts.json'
)
@classmethod
def __check_source_file(cls, datetime_str: str):
"""
Check single day's market depth data csv file.
:param datetime_str: datatime of need parse, eg: 20230926.
"""
cls.datetime_str = datetime_str
year, month, day = datetime_str[:4], datetime_str[4:6], datetime_str[6:]
source_path = f'E:/futures_market_data/market_depth_data/{year}/{month}/market_depth_data_{datetime_str}.csv'
cls.market_depth_format_data_dir = f'E:/futures_market_data/market_depth_format_data/{year}/{month}/{datetime_str}'
assert os.path.exists(path=source_path) is True, f'Bad source_path {source_path}.'
if os.path.exists(path=cls.market_depth_format_data_dir) is False:
os.makedirs(cls.market_depth_format_data_dir, exist_ok=True)
cls.source_content = pandas.read_csv(source_path)
@classmethod
def __get_format_market_data(cls, instrument_source_df: pandas.DataFrame()) -> pandas.DataFrame():
"""
Clean single instrument dataframe.
:param instrument_source_df: source dataframe of single instrument
:return: single instrument dataframe after clean.
"""
assert len(instrument_source_df['ExchangeInstrument'].unique()) == 1, f'Bad ExchangeInstrument {instrument_source_df["ExchangeInstrument"].unique()}.'
symbol = re.sub(r'\d+', '', instrument_source_df['ExchangeInstrument'].unique()[0])
assert symbol in cls.session_map.keys(), f'Bad symbol: {symbol}.'
symbol_session_map = cls.session_map[symbol]['night'] + cls.session_map[symbol]['day']
instrument_source_df['trading_time'] = False
instrument_format_df = None
for single_session in symbol_session_map:
if 2 == len(single_session):
if single_session[0] < single_session[1]:
instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) & (instrument_source_df['UpdateTime'] < single_session[1])
elif single_session[1] > single_session[0]:
instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) | (instrument_source_df['UpdateTime'] < single_session[1])
if instrument_format_df is None:
instrument_format_df = instrument_source_df[instrument_source_df['trading_time'] == True]
else:
instrument_format_df = pandas.concat([instrument_format_df, instrument_source_df[instrument_source_df['trading_time'] == True]])
instrument_format_df.sort_values(by='Timestamp', ascending=True, inplace=True)
instrument_format_df.reset_index(inplace=True)
del instrument_format_df['index']
del instrument_format_df['trading_time']
return instrument_format_df
if __name__ == '__main__':
MarketDataParser.run(['20230926', '20230928'])
量化交易之One Piece篇 - onepiece_rsh - 全市场期货数据解析(新增dump全市场当日期货主力配置文件)
最新推荐文章于 2024-05-03 11:07:56 发布