量化交易之One Piece篇 - onepiece_rsh - 全市场期货数据解析(新增dump全市场当日期货主力配置文件)

import os
import re
import gc

# import pandas
from tqz_extern.pandas_operator import pandas
from tqz_extern.json_operator import TQZJsonOperator

import warnings
warnings.filterwarnings("ignore")

class MarketDataParser:

    session_map = TQZJsonOperator.tqz_load_jsonfile(jsonfile='../trading_time/source_trading_time.json')

    source_content = None

    datetime_str: str = ''
    market_depth_format_data_dir: str = ''
    daily_bar_dir: str = 'E:/futures_market_data/market_daily_bar_data'
    daily_main_contracts_config_dir: str = 'E:/futures_market_data/daily_main_contracts_config'

    main_contract_map: {str, dict} = {}

    @classmethod
    def run(cls, parser_datetime_list):
        """
        Parse all market depth data in parser_datetime_list day.
        :param parser_datetime_list:
        """
        parser_datetime_list = sorted(parser_datetime_list)

        for parser_datetime in parser_datetime_list:
            MarketDataParser.dump_all_format_csv(datetime_str=parser_datetime).with_daily_bar().with_daily_main_contracts_config()

            cls.source_content = None
            cls.main_contract_map = {}
            gc.collect()


    @classmethod
    def dump_all_format_csv(cls, datetime_str: str):
        """
        Dump all instruments' format csv of one day.
        :return:
        """
        cls.__check_source_file(datetime_str=datetime_str)

        assert cls.source_content is not None, f'cls.source_content is None.'
        all_instrument_ids = list(set(cls.source_content['InstrumentID'].values))

        for instrumentID in all_instrument_ids:
            single_instrument_df = cls.source_content[cls.source_content["InstrumentID"] == instrumentID]

            cls.__get_format_market_data(
                instrument_source_df=single_instrument_df
            ).to_csv(f'{cls.market_depth_format_data_dir}/{instrumentID}.csv', index=False)

        return cls

    @classmethod
    def with_daily_bar(cls):
        """
        Dump daily bar.
        """

        assert os.path.exists(path=cls.market_depth_format_data_dir) is True, f'Bad path: {cls.market_depth_format_data_dir} is not exist.'
        if os.path.exists(path=cls.daily_bar_dir) is False:
            os.makedirs(cls.daily_bar_dir, exist_ok=True)

        for root, dirs, files in os.walk(cls.market_depth_format_data_dir):
            for file in files:
                file_path = os.path.join(root, file)
                if file.split('.')[1] == 'csv':
                    instrument_df = pandas.read_csv(file_path)

                    if (len(instrument_df[instrument_df['Volume'] == 0]) != 0) or (len(instrument_df[instrument_df['OpenInterest'] == 0]) != 0):
                        instrument_df.drop(instrument_df[instrument_df['Volume'] == 0].index, inplace=True)
                        instrument_df.drop(instrument_df[instrument_df['OpenInterest'] == 0].index, inplace=True)
                        instrument_df.reset_index(inplace=True)
                        del instrument_df['index']

                    if 0 == len(instrument_df):
                        continue

                    last_line_df = instrument_df.tail(1)
                    ret_exchange_instrument = re.sub(r'\d+', '', last_line_df['ExchangeInstrument'].tolist()[0])
                    instrument_dir = f'{cls.daily_bar_dir}/{ret_exchange_instrument}'
                    if os.path.exists(path=instrument_dir) is False:
                        os.makedirs(instrument_dir, exist_ok=True)

                    instrument_path = f'{instrument_dir}/{last_line_df["ExchangeInstrument"].tolist()[0]}.csv'
                    if os.path.exists(path=instrument_path) is False:
                        last_line_map = {
                            'date': [cls.datetime_str],
                            'Open': [last_line_df['OpenPrice'].tolist()[0]],
                            'High': [last_line_df['HighestPrice'].tolist()[0]],
                            'Low': [last_line_df['LowestPrice'].tolist()[0]],
                            'Close': [last_line_df['LastPrice'].tolist()[0]],
                            'Volume': [last_line_df['Volume'].tolist()[0]],
                            'OpenInterest': [last_line_df['OpenInterest'].tolist()[0]],
                            'PriceTick': [last_line_df['PriceTick'].tolist()[0]],
                            'VolumeMultiple': [last_line_df['VolumeMultiple'].tolist()[0]],
                        }
                        instrument_daily_bar_df = pandas.DataFrame(last_line_map)
                        instrument_daily_bar_df.to_csv(instrument_path, index=False)
                    else:
                        instrument_daily_bar_df = pandas.read_csv(instrument_path)
                        last_line_map = {
                            'date': cls.datetime_str,
                            'Open': last_line_df['OpenPrice'].tolist()[0],
                            'High': last_line_df['HighestPrice'].tolist()[0],
                            'Low': last_line_df['LowestPrice'].tolist()[0],
                            'Close': last_line_df['LastPrice'].tolist()[0],
                            'Volume': last_line_df['Volume'].tolist()[0],
                            'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
                            'PriceTick': last_line_df['PriceTick'].tolist()[0],
                            'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
                        }
                        if last_line_map['date'] not in instrument_daily_bar_df['date'].astype(str).values.tolist():
                            instrument_daily_bar_df = instrument_daily_bar_df.append(last_line_map, ignore_index=True)
                            instrument_daily_bar_df['date'] = instrument_daily_bar_df['date'].astype(str)
                            instrument_daily_bar_df.sort_values(by='date', ascending=True, inplace=True)
                            instrument_daily_bar_df.reset_index(inplace=True)
                            del instrument_daily_bar_df['index']

                        instrument_daily_bar_df.to_csv(instrument_path, index=False)

                    if ret_exchange_instrument in cls.main_contract_map.keys():
                        pre_contract = cls.main_contract_map[ret_exchange_instrument]
                        if pre_contract['Volume'] < last_line_df['Volume'].tolist()[0]:
                            cls.main_contract_map[ret_exchange_instrument] = {
                                'ExchangeInstrument': last_line_df['ExchangeInstrument'].tolist()[0],
                                'Date': cls.datetime_str,
                                'Open': last_line_df['OpenPrice'].tolist()[0],
                                'High': last_line_df['HighestPrice'].tolist()[0],
                                'Low': last_line_df['LowestPrice'].tolist()[0],
                                'Close': last_line_df['LastPrice'].tolist()[0],
                                'Volume': last_line_df['Volume'].tolist()[0],
                                'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
                                'PriceTick': last_line_df['PriceTick'].tolist()[0],
                                'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
                            }
                    else:
                        cls.main_contract_map[ret_exchange_instrument] = {
                            'ExchangeInstrument': last_line_df['ExchangeInstrument'].tolist()[0],
                            'Date': cls.datetime_str,
                            'Open': last_line_df['OpenPrice'].tolist()[0],
                            'High': last_line_df['HighestPrice'].tolist()[0],
                            'Low': last_line_df['LowestPrice'].tolist()[0],
                            'Close': last_line_df['LastPrice'].tolist()[0],
                            'Volume': last_line_df['Volume'].tolist()[0],
                            'OpenInterest': last_line_df['OpenInterest'].tolist()[0],
                            'PriceTick': last_line_df['PriceTick'].tolist()[0],
                            'VolumeMultiple': last_line_df['VolumeMultiple'].tolist()[0],
                        }

        for exchange_instru, main_contract_info in cls.main_contract_map.items():
            main_contract_path = f'{cls.daily_bar_dir}/{exchange_instru}/{exchange_instru}888.csv'
            if os.path.exists(path=main_contract_path) is False:
                last_line_map = {
                    'date': [cls.datetime_str],
                    'instrumentID': main_contract_info['ExchangeInstrument'],
                    'Open': [main_contract_info['Open']],
                    'High': [main_contract_info['High']],
                    'Low': [main_contract_info['Low']],
                    'Close': [main_contract_info['Close']],
                    'Volume': [main_contract_info['Volume']],
                    'OpenInterest': [main_contract_info['OpenInterest']],
                    'PriceTick': [main_contract_info['PriceTick']],
                    'VolumeMultiple': [main_contract_info['VolumeMultiple']],
                }
                main_instrument_daily_bar_df = pandas.DataFrame(last_line_map)
                main_instrument_daily_bar_df.to_csv(main_contract_path, index=False)
            else:
                main_instrument_daily_bar_df = pandas.read_csv(main_contract_path)
                last_line_map = {
                    'date': cls.datetime_str,
                    'instrumentID': main_contract_info['ExchangeInstrument'],
                    'Open': main_contract_info['Open'],
                    'High': main_contract_info['High'],
                    'Low': main_contract_info['Low'],
                    'Close': main_contract_info['Close'],
                    'Volume': main_contract_info['Volume'],
                    'OpenInterest': main_contract_info['OpenInterest'],
                    'PriceTick': main_contract_info['PriceTick'],
                    'VolumeMultiple': main_contract_info['VolumeMultiple'],
                }
                if last_line_map['date'] not in main_instrument_daily_bar_df['date'].astype(str).values.tolist():
                    main_instrument_daily_bar_df = main_instrument_daily_bar_df.append(last_line_map, ignore_index=True)
                    main_instrument_daily_bar_df['date'] = main_instrument_daily_bar_df['date'].astype(str)
                    main_instrument_daily_bar_df.sort_values(by='date', ascending=True, inplace=True)
                    main_instrument_daily_bar_df.reset_index(inplace=True)
                    del main_instrument_daily_bar_df['index']

                main_instrument_daily_bar_df.to_csv(main_contract_path, index=False)

        return cls

    @classmethod
    def with_daily_main_contracts_config(cls):
        """
        Dump daily main contracts config.
        """
        if os.path.exists(path=cls.daily_main_contracts_config_dir) is False:
            os.makedirs(cls.daily_main_contracts_config_dir, exist_ok=True)

        main_contract_format_map: {str, str} = {}
        for exchange_instru, info in cls.main_contract_map.items():
            main_contract_format_map[exchange_instru] = info['ExchangeInstrument']

        TQZJsonOperator.tqz_write_jsonfile(
            content=main_contract_format_map,
            target_jsonfile=f'{cls.daily_main_contracts_config_dir}/{cls.datetime_str}_main_contracts.json'
        )


    @classmethod
    def __check_source_file(cls, datetime_str: str):
        """
        Check single day's market depth data csv file.
        :param datetime_str: datatime of need parse, eg: 20230926.
        """
        cls.datetime_str = datetime_str
        year, month, day = datetime_str[:4], datetime_str[4:6], datetime_str[6:]

        source_path = f'E:/futures_market_data/market_depth_data/{year}/{month}/market_depth_data_{datetime_str}.csv'
        cls.market_depth_format_data_dir = f'E:/futures_market_data/market_depth_format_data/{year}/{month}/{datetime_str}'

        assert os.path.exists(path=source_path) is True, f'Bad source_path {source_path}.'
        if os.path.exists(path=cls.market_depth_format_data_dir) is False:
            os.makedirs(cls.market_depth_format_data_dir, exist_ok=True)

        cls.source_content = pandas.read_csv(source_path)


    @classmethod
    def __get_format_market_data(cls, instrument_source_df: pandas.DataFrame()) -> pandas.DataFrame():
        """
        Clean single instrument dataframe.
        :param instrument_source_df: source dataframe of single instrument
        :return: single instrument dataframe after clean.
        """

        assert len(instrument_source_df['ExchangeInstrument'].unique()) == 1, f'Bad ExchangeInstrument {instrument_source_df["ExchangeInstrument"].unique()}.'
        symbol = re.sub(r'\d+', '', instrument_source_df['ExchangeInstrument'].unique()[0])

        assert symbol in cls.session_map.keys(), f'Bad symbol: {symbol}.'
        symbol_session_map = cls.session_map[symbol]['night'] + cls.session_map[symbol]['day']

        instrument_source_df['trading_time'] = False
        instrument_format_df = None
        for single_session in symbol_session_map:
            if 2 == len(single_session):
                if single_session[0] < single_session[1]:
                    instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) & (instrument_source_df['UpdateTime'] < single_session[1])
                elif single_session[1] > single_session[0]:
                    instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) | (instrument_source_df['UpdateTime'] < single_session[1])

                if instrument_format_df is None:
                    instrument_format_df = instrument_source_df[instrument_source_df['trading_time'] == True]
                else:
                    instrument_format_df = pandas.concat([instrument_format_df, instrument_source_df[instrument_source_df['trading_time'] == True]])

        instrument_format_df.sort_values(by='Timestamp', ascending=True, inplace=True)
        instrument_format_df.reset_index(inplace=True)
        del instrument_format_df['index']
        del instrument_format_df['trading_time']

        return instrument_format_df


if __name__ == '__main__':
    MarketDataParser.run(['20230926', '20230928'])

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值