量化交易之数字货币篇 - 高频数据特征提取

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from tqz_data_parser.tqz_footPrint_data import TQZTimestampTool

class TQZBarFilter:

    @classmethod
    def get_filter_bar_data(cls, bar_data: pd.DataFrame, footPrint_data: pd.DataFrame):
        max_timestamp_daily, min_timestamp_daily = footPrint_data['timestamp'].max(), footPrint_data['timestamp'].min()
        bar_data_filter = bar_data[
            (min_timestamp_daily <= bar_data['datetime']) & (bar_data['datetime'] <= max_timestamp_daily)
        ]

        bar_data_filter.reset_index(inplace=True)
        del bar_data_filter['index']

        return bar_data_filter


class TQZFeatureEngineering:

    @classmethod
    def trades_features_marker(cls, bar_data, footPrint_data, price_digital_points: int = 1):
        """
        add trades features.
        :param price_digital_points: price digital point counts
        :param bar_data: whatever day
        :param footPrint_data: single day
        :return: bar_data of single day with trades features.
        """

        bar_data_filter = TQZBarFilter.get_filter_bar_data(bar_data=bar_data, footPrint_data=footPrint_data)
        footPrint_data['ab_size'] = footPrint_data['ask_size'] + footPrint_data['bid_size']

        for timestamp in sorted(list(set(footPrint_data['timestamp'].values))):
            footPrint_data_filter = footPrint_data[footPrint_data['timestamp'] == timestamp]

            # poc
            poc = footPrint_data_filter.loc[footPrint_data_filter["ab_size"] == footPrint_data_filter["ab_size"].max(), "price"].mean()
            poc_size = footPrint_data_filter.loc[footPrint_data_filter["ab_size"] == footPrint_data_filter["ab_size"].max(), 'ab_size'].mean()
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'poc'] = poc

            # big trades ratio
            big_trades = footPrint_data_filter.loc[footPrint_data_filter["ab_size"] > (footPrint_data_filter["ab_size"].mean() + 3 * footPrint_data_filter["ab_size"].std()), 'ab_size'].sum()
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'big_trades_ratio'] = big_trades / footPrint_data_filter['ab_size'].sum()

            # poc_weight
            poc_weight_df = footPrint_data_filter.loc[footPrint_data_filter["ab_size"] > (footPrint_data_filter["ab_size"].mean() + 3 * footPrint_data_filter["ab_size"].std()), ['price', 'ab_size']]
            poc_weight = round((poc_weight_df['price'] * poc_weight_df['ab_size']).sum() / poc_weight_df['ab_size'].sum(), price_digital_points)
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'poc_weight'] = poc_weight

            # poc_upper_size-poc_lower_size: f(x) = poc_upper_size - poc_lower_size; f(x)↗ long_market↗
            poc_upper_size = footPrint_data_filter[footPrint_data_filter['price'] > poc]['ab_size'].sum()
            poc_lower_size = footPrint_data_filter[footPrint_data_filter['price'] < poc]['ab_size'].sum()
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'pus_pls'] = poc_upper_size - poc_lower_size

            # poc_weight_upper_size-poc_weight_lower_size: f(x) = poc_weight_upper_size - poc_weight_lower_size; f(x)↗ long_market↗
            poc_weight_upper_size = footPrint_data_filter[footPrint_data_filter['price'] > poc_weight]['ab_size'].sum()
            poc_weight_lower_size = footPrint_data_filter[footPrint_data_filter['price'] < poc_weight]['ab_size'].sum()
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'pwus_pwls'] = poc_weight_upper_size - poc_weight_lower_size

            # sas-sbs: f(x) = sum_ask_size - sum_bid_size; f(x)↗ long_market↗
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'sas_sbs'] = footPrint_data_filter['ask_size'].sum() - footPrint_data_filter['bid_size'].sum()

            # poc_size / sum(size)
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'ps_ss_ratio'] = poc_size / footPrint_data_filter['ab_size'].sum()

            # substance_bar_trades(open-close) ratio: trades(open-close) / trades(sum)
            bar_open, bar_close = bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'open'].mean(), bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'close'].mean()
            min_price, max_price = min(bar_open, bar_close), max(bar_open, bar_close)

            substance_bar = footPrint_data_filter[(min_price <= footPrint_data_filter['price']) & (footPrint_data_filter['price'] <= max_price)]
            substance_bar_trades_ratio = substance_bar['ab_size'].sum() / footPrint_data_filter['ab_size'].sum()
            bar_data_filter.loc[bar_data_filter['datetime'] == timestamp, 'sbar_trades_ratio'] = substance_bar_trades_ratio

        # hp / pl ratio: f(x) = (high - poc) / (poc - low); f(x)↗ long_market↗
        bar_data_filter['hp_pl_ratio'] = (bar_data_filter['high'] - bar_data_filter['poc']) / (bar_data_filter['poc'] - bar_data_filter['low'])

        # hpw / pwl ratio: f(x) = (high - poc_weight) / (poc_weight - low); f(x)↗ long_market↗
        bar_data_filter['hpw_pwl_ratio'] = (bar_data_filter['high'] - bar_data_filter['poc_weight']) / (bar_data_filter['poc_weight'] - bar_data_filter['low'])

        # cp: f(x) = close - poc; f(x)↗ long_market↗
        bar_data_filter['cp'] = bar_data_filter['close'] - bar_data_filter['poc']

        # cpw: f(x) = close - poc_weight; f(x)↗ long_market↗
        bar_data_filter['cpw'] = bar_data_filter['close'] - bar_data_filter['poc_weight']

        # co / hl ratio: f(x) = (close - open) / (high - low); |f(x)|↗ trend_degree↗
        bar_data_filter['co_hl_ratio'] = (bar_data_filter['close'] - bar_data_filter['open']) / (bar_data_filter['high'] - bar_data_filter['low'])

        # op+cp: f(x) = (open - poc) + (close - poc); f(x)↗ long_market↗
        bar_data_filter['op+cp'] = (bar_data_filter['open'] - bar_data_filter['poc']) + (bar_data_filter['close'] - bar_data_filter['poc'])

        # opw+cpw: f(x) = (open - poc_weight) + (close - poc_weight); f(x)↗ long_market↗
        bar_data_filter['opw+cpw'] = (bar_data_filter['open'] - bar_data_filter['poc_weight']) + (bar_data_filter['close'] - bar_data_filter['poc_weight'])

        return bar_data_filter

    @classmethod
    def orderbook_marker(cls, source_orderbook_data, price_digital_points: int = 1, size_digital_points: int = 3):
        """
        add base orderbook features.
        :param source_orderbook_data: source orderbook data.
        :param price_digital_points: price digital point counts.
        :param size_digital_points: size digital point counts.
        :return:
        """

        # price, size: 1 gear
        source_orderbook_data['o_s_1gear_price'] = round((source_orderbook_data['Ask_Price_01'] + source_orderbook_data['Bid_Price_01']) * 0.5, price_digital_points)
        source_orderbook_data['o_1gear_size'] = round(source_orderbook_data['Ask_Volume_01'] + source_orderbook_data['Bid_Volume_01'], size_digital_points)

        # price, size: 10 gears
        source_orderbook_data['o_s_10gears_price'] = round(
            (source_orderbook_data['Ask_Price_01'] + source_orderbook_data['Ask_Price_02'] + source_orderbook_data['Ask_Price_03'] + source_orderbook_data['Ask_Price_04'] +
             source_orderbook_data['Ask_Price_05'] + source_orderbook_data['Ask_Price_06'] + source_orderbook_data['Ask_Price_07'] + source_orderbook_data['Ask_Price_08'] + source_orderbook_data['Ask_Price_09'] + source_orderbook_data['Ask_Price_10'] +
             source_orderbook_data['Bid_Price_01'] + source_orderbook_data['Bid_Price_02'] + source_orderbook_data['Bid_Price_03'] + source_orderbook_data['Bid_Price_04'] + source_orderbook_data['Bid_Price_05'] + source_orderbook_data['Bid_Price_06'] +
             source_orderbook_data['Bid_Price_07'] + source_orderbook_data['Bid_Price_08'] + source_orderbook_data['Bid_Price_09'] + source_orderbook_data['Bid_Price_10']) * 0.05, price_digital_points)

        source_orderbook_data['o_10gears_ask_size'] = round(source_orderbook_data['Ask_Volume_01'] + source_orderbook_data['Ask_Volume_02'] + source_orderbook_data['Ask_Volume_03'] + source_orderbook_data['Ask_Volume_04'] + source_orderbook_data['Ask_Volume_05'] + source_orderbook_data['Ask_Volume_06'] + source_orderbook_data['Ask_Volume_07'] + source_orderbook_data['Ask_Volume_08'] + source_orderbook_data['Ask_Volume_09'] + source_orderbook_data['Ask_Volume_10'], size_digital_points)
        source_orderbook_data['o_10gears_bid_size'] = round(source_orderbook_data['Bid_Volume_01'] + source_orderbook_data['Bid_Volume_02'] + source_orderbook_data['Bid_Volume_03'] + source_orderbook_data['Bid_Volume_04'] + source_orderbook_data['Bid_Volume_05'] + source_orderbook_data['Bid_Volume_06'] + source_orderbook_data['Bid_Volume_07'] + source_orderbook_data['Bid_Volume_08'] + source_orderbook_data['Bid_Volume_09'] + source_orderbook_data['Bid_Volume_10'], size_digital_points)
        source_orderbook_data['o_10gears_size'] = round(source_orderbook_data['o_10gears_ask_size'] + source_orderbook_data['o_10gears_bid_size'], size_digital_points)

        # weight price: 1 gear
        source_orderbook_data['o_w_1gear_price'] = round((source_orderbook_data['Ask_Price_01'] * source_orderbook_data['Bid_Volume_01'] + source_orderbook_data['Bid_Price_01'] * source_orderbook_data['Ask_Volume_01']) / source_orderbook_data['o_1gear_size'], price_digital_points)

        # weight price: 10 gears
        source_orderbook_data['o_w_10gears_price'] = round(
            (source_orderbook_data['Ask_Price_01'] * source_orderbook_data['Bid_Volume_01'] + source_orderbook_data['Bid_Price_01'] * source_orderbook_data['Ask_Volume_01'] +
             source_orderbook_data['Ask_Price_02'] * source_orderbook_data['Bid_Volume_02'] + source_orderbook_data['Bid_Price_02'] * source_orderbook_data['Ask_Volume_02'] +
             source_orderbook_data['Ask_Price_03'] * source_orderbook_data['Bid_Volume_03'] + source_orderbook_data['Bid_Price_03'] * source_orderbook_data['Ask_Volume_03'] +
             source_orderbook_data['Ask_Price_04'] * source_orderbook_data['Bid_Volume_04'] + source_orderbook_data['Bid_Price_04'] * source_orderbook_data['Ask_Volume_04'] +
             source_orderbook_data['Ask_Price_05'] * source_orderbook_data['Bid_Volume_05'] + source_orderbook_data['Bid_Price_05'] * source_orderbook_data['Ask_Volume_05'] +
             source_orderbook_data['Ask_Price_06'] * source_orderbook_data['Bid_Volume_06'] + source_orderbook_data['Bid_Price_06'] * source_orderbook_data['Ask_Volume_06'] +
             source_orderbook_data['Ask_Price_07'] * source_orderbook_data['Bid_Volume_07'] + source_orderbook_data['Bid_Price_07'] * source_orderbook_data['Ask_Volume_07'] +
             source_orderbook_data['Ask_Price_08'] * source_orderbook_data['Bid_Volume_08'] + source_orderbook_data['Bid_Price_08'] * source_orderbook_data['Ask_Volume_08'] +
             source_orderbook_data['Ask_Price_09'] * source_orderbook_data['Bid_Volume_09'] + source_orderbook_data['Bid_Price_09'] * source_orderbook_data['Ask_Volume_09'] +
             source_orderbook_data['Ask_Price_10'] * source_orderbook_data['Bid_Volume_10'] + source_orderbook_data['Bid_Price_10'] * source_orderbook_data['Ask_Volume_10']) / source_orderbook_data['o_10gears_size'], price_digital_points)

        # max ask_price_diff, bid_price_diff
        source_orderbook_data['o_max_ap_diff'] = source_orderbook_data['Ask_Price_10'] - source_orderbook_data['Ask_Price_01']
        source_orderbook_data['o_max_bp_diff'] = source_orderbook_data['Bid_Price_01'] - source_orderbook_data['Bid_Price_10']

        return source_orderbook_data

    @classmethod
    def orderbook_features_marker(cls, bar_data_filter, orderbook_data, price_digital_points: int = 1, size_digital_points: int = 3):
        """
        add orderbook features to bar_data.
        :param price_digital_points: price digital point counts.
        :param size_digital_points: size digital point counts.
        :param bar_data_filter: single day
        :param orderbook_data: source orderbook data with base features.
        :return: bar_data_filter with orderbook features.
        """

        start_timestamp = TQZTimestampTool.get_timestamp(date_str='2022-07-01')
        end_timestamp = TQZTimestampTool.get_timestamp(date_str='2022-07-02')
        timestamps_interval = 15 * 60000

        while True:
            orderbook_data_filter = orderbook_data[(start_timestamp <= orderbook_data['Datatime']) & (orderbook_data['Datatime'] < start_timestamp + timestamps_interval)]

            # orderbook counts
            bar_data_filter.loc[bar_data_filter['datetime'] == start_timestamp, 'o_counts'] = len(orderbook_data_filter)

            # o_10gears_size_mean | o_1gear_size_mean
            bar_data_filter.loc[bar_data_filter['datetime'] == start_timestamp, 'o_1gear_size_mean'] = round(orderbook_data_filter['o_1gear_size'].mean(), size_digital_points)
            bar_data_filter.loc[bar_data_filter['datetime'] == start_timestamp, 'o_10gears_size_mean'] = round(orderbook_data_filter['o_10gears_size'].mean(), size_digital_points)

            # o_max_ap_diff_mean | o_max_bp_diff_mean
            bar_data_filter.loc[bar_data_filter['datetime'] == start_timestamp, 'o_max_ap_diff_mean'] = round(orderbook_data_filter['o_max_ap_diff'].mean(), price_digital_points)
            bar_data_filter.loc[bar_data_filter['datetime'] == start_timestamp, 'o_max_bp_diff_mean'] = round(orderbook_data_filter['o_max_bp_diff'].mean(), price_digital_points)

            if start_timestamp >= end_timestamp:
                break
            start_timestamp = start_timestamp + timestamps_interval

        return bar_data_filter


from public_module.tqz_extern.tools.pandas_operator.pandas_operator import pandas
if __name__ == '__main__':
    _btc_bar_15m_data = pd.read_csv('../source_data/BTCUSDT_15m.csv')
    _btc_orderbook_data = pd.read_csv('../source_data/Order_Book_BTCUSDT_2022-07-01.csv')
    _btc_footPrint_15m_data = pd.read_csv('../source_data/BTCUSDT_15mBar_footPrint_2022-07-01.csv')

    """ """
    _bar_data_filter = TQZFeatureEngineering.trades_features_marker(
        bar_data=_btc_bar_15m_data,
        footPrint_data=_btc_footPrint_15m_data
    )

    """
    _bar_data_filter = TQZFeatureEngineering.orderbook_features_marker(
        bar_data_filter=_bar_data_filter,
        orderbook_data=TQZFeatureEngineering.orderbook_marker(source_orderbook_data=_btc_orderbook_data)
    )
    """

    print("_bar_data_filter: " + str(_bar_data_filter))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值