OGA code

# OGA + HDIC + Trim
'''
思路:
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1.	進行Kn次的OGA的迭代,其中Kn是OGA的迭代上限
2.	獲得k個回歸因子後,計算高維信息準則HDIC;
3.	選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4.	使用Trim獲得最小的相關數據集合

1. Kn計算  1
2. OGA算法實現
3. HDIC公式  1
4. Trim      1
5. choosMink
6. getTrim

Train:
1.

Test:

pre:

cv:

'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17

# generate the data
# y = Xw
# |x|_0 = n_nonzero_coefs

y, X, w = make_sparse_coded_signal(
    n_samples=1,
    n_components=n_components,
    n_features=n_features,
    n_nonzero_coefs=n_nonzero_coefs,
    random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)

class OGA_HDIC_Trim:
    def __init__(self, df):
        self.df = df
        self.row, self.col = df.shape
        self.sigma = np.inf      # 模型當前的標準差
        self.newproject = pd.DataFrame()   # 選擇出的特征經過投影後的表示
        self.coef = np.array([])
        self.u = df.iloc[:, -1].values.reshape((-1, 1)) # 當前的損失
        self.OGA()

    def MaxIterKn(self, D = 5):
        '''
        Kn = O (n / log(p))^0.5
        取 D (n / log(p))^0.5
        :return:
        '''
        return int(D * (self.row * np.log(self.col)) ** 0.5)
    def HDIC(self, IC = 'HDBIC', c=2):
        '''
        根據w的值不同,我們的信息準則也不同
        HDIC = n * log(sigma ^ 2) + #J * wn * logp
        n = the numbers of saple
        p = the numbers of dimension
        sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
        #J : 進入模型的樣本數量
        :param wn: HDBIC wn = log(n) ,  wn = c * log(log(n))
        :return: HDIC
        '''
        fir = self.row * np.log(self.sigma)
        sec = self.newproject.sape[1] * np.log(self.col)
        if IC == 'HDBIC':
            return  fir + sec * np.log(self.row)
        elif IC == 'HDHQ':
            return fir + sec * c * np.log(np.log(self.row))
    def _x_project(self, x_new):
        '''
        self.newproject = [[x1_, x2_, x3_]]
        x_new
        x_new = x_new - (x_new * x1) / x1 ** 2   x1
        :param X:
        :return:
        '''
        newproject = self.newproject.values
        valprojectX = np.sum(x_new * newproject, axis=0)
        valprojextS = np.sum(newproject ** 2, axis=0)
        coef = valprojectX / valprojextS
        x_project = (np.sum( coef * newproject, axis = 1)).reshape((x_new.shape[0], -1))
        return  x_new - x_project

    def OGA(self):
        '''
        1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
        找到當前最優的x_(k+1)_
        2. 根據正交投影計算出最新的特征
        x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
        一次選擇出一個特征
        :return: 返回 x_new_
        '''
        col1 = ''
        coef = 0
        error = np.inf  # 記錄每一個特征的誤差 col : u
        u = np.array([])  #
        # 最小二乘法計算每一個特征
        for col in self.df.columns:
            if col in self.newproject.columns:
                continue
            # u, col
            model = linear_model.LinearRegression(fit_intercept=False)
            model.fit(self.df[col].values.reshape(-1,1), self.u)
            predict = model.predict(df[col])
            temp_error = np.sum( (self.u - predict) ** 2) / self.row
            if temp_error < error:
                col1 = col
                error = temp_error
                coef = model.coef_
                u = predict
                print(predict.shape)
        self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
        self.coef = np.append(self.coef,coef)
        self.u = u.values.reshape((-1,1))


OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)






```python
# OGA + HDIC + Trim
'''
思路:
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1.	進行Kn次的OGA的迭代,其中Kn是OGA的迭代上限
2.	獲得k個回歸因子後,計算高維信息準則HDIC;
3.	選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4.	使用Trim獲得最小的相關數據集合

1. Kn計算  1
2. OGA算法實現
3. HDIC公式  1
4. Trim      1
5. choosMink
6. getTrim

Train:
1.

Test:

pre:

cv:
'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17

# generate the data
# y = Xw
# |x|_0 = n_nonzero_coefs

y, X, w = make_sparse_coded_signal(
    n_samples=1,
    n_components=n_components,
    n_features=n_features,
    n_nonzero_coefs=n_nonzero_coefs,
    random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)

class OGA_HDIC_Trim:
    def __init__(self, df):
        self.df = df
        self.row, self.col = self.df.shape
        self.columns = self.df.columns
        self.u = self.df.iloc[:,-1].values.astype(np.float32).reshape((-1,1))   # 當前的損失
        self.X_feat_np = self.df.iloc[:, :-1].values.astype(np.float32)
        self.sigma = np.inf      # 模型當前的標準差
        self.columnsind = []  # 选出的特征索引
        self.newproject = np.array([])   # 選擇出的特征經過投影後的表示
        self.coef = np.array([])
        self.OGA()

    def MaxIterKn(self, D = 5):
        '''
        Kn = O (n / log(p))^0.5
        取 D (n / log(p))^0.5
        :return:
        '''

        return int(D * (self.row * np.log(self.col)) ** 0.5)

    def HDIC(self, IC = 'HDBIC', c=2):
        '''
        根據w的值不同,我們的信息準則也不同
        HDIC = n * log(sigma ^ 2) + #J * wn * logp
        n = the numbers of saple
        p = the numbers of dimension
        sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
        #J : 進入模型的樣本數量
        :param wn: HDBIC wn = log(n) ,  wn = c * log(log(n))
        :return: HDIC
        '''
        fir = self.row * np.log(self.sigma)
        sec = self.newproject.shape[1] * np.log(self.col)
        if IC == 'HDBIC':
            return  fir + sec * np.log(self.row)
        elif IC == 'HDHQ':
            return fir + sec * c * np.log(np.log(self.row))

    def _x_project(self, x_new):
        '''
        self.newproject = [[x1_, x2_, x3_]]
        x_new
        x_new = x_new - (x_new * x1) / x1 ** 2   x1
        :param X:
        :return:
        '''
        valprojectX = np.sum(x_new * self.newproject, axis=0)  # 分子
        valprojextS = np.sum(self.newproject ** 2, axis=0)     # 分母
        coef = valprojectX / valprojextS
        x_project = (np.sum( coef * self.newproject, axis = 1)).reshape((x_new.shape[0], -1))
        return  x_new - x_project

    def OGA(self):
        '''
        1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
        找到當前最優的x_(k+1)_
        2. 根據正交投影計算出最新的特征
        x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
        一次選擇出一個特征
        :return: 返回 x_new_
        '''
        col1 = ''
        coef = 0
        error = np.inf  # 記錄每一個特征的誤差 col : u
        u = np.array([])  #
        # 最小二乘法計算每一個特征
        for col in self.df.columns:
            if col in self.newproject.columns:
                continue
            # u, col
            model = linear_model.LinearRegression(fit_intercept=False)
            model.fit(self.df[col].values.reshape(-1,1), self.u)
            predict = model.predict(df[col])
            temp_error = np.sum( (self.u - predict) ** 2) / self.row
            if temp_error < error:
                col1 = col
                error = temp_error
                coef = model.coef_
                u = predict
                print(predict.shape)
        self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
        self.coef = np.append(self.coef,coef)
        self.u = u.values.reshape((-1,1))


OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)

```python
在这里插入import numpy as np
import pandas as pd
import ta
import random
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import Adam


def add_technical_indicators(data, window_size=10):
    data['MA_5'] = ta.trend.sma_indicator(data['Close'], window=5)
    data['MA_10'] = ta.trend.sma_indicator(data['Close'], window=10)
    data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi()
    data['BB_HIGH'], data['BB_MID'], data['BB_LOW'] = ta.volatility.BollingerBands(
        data['Close']).bollinger_hband(), ta.volatility.BollingerBands(
        data['Close']).bollinger_mavg(), ta.volatility.BollingerBands(data['Close']).bollinger_lband()
    data['Past_High'] = data['High'].rolling(window=window_size).max()
    data['Past_Low'] = data['Low'].rolling(window=window_size).min()
    data.dropna(inplace=True)
    return data


class LSTMTrader:
    def __init__(self, input_dim, action_space):
        self.input_dim = input_dim
        self.action_space = action_space
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.input_dim, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=self.action_space, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        return model

    # def build_model(self):
    #     model = Sequential()
    #     model.add(Dense(units=64, input_dim=self.state_size * self.num_features, activation="relu"))
    #     # model.add(LSTM(64, input_shape=(1, self.input_dim), return_sequences=True))
    #     model.add(Dropout(0.2))
    #     model.add(LSTM(32))
    #     model.add(Dropout(0.2))
    #     model.add(Dense(self.action_space, activation='linear'))
    #     model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
    #     return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_space)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


def preprocess_data(data, window_size):
    result = []
    for i in range(len(data) - window_size + 1):
        result.append(data[i: i + window_size])
    return np.array(result)


def prepare_data(data, window_size):
    data = add_technical_indicators(data)
    data = data.drop(columns=['Date', 'Time'])  # Add this line
    data = data.to_numpy()
    data = preprocess_data(data, window_size)
    return data


def split_data(data, train_ratio):
    split_index = int(train_ratio * len(data))
    train_data = data[:split_index]
    test_data = data[split_index:]
    return train_data, test_data


def main():
    window_size = 10
    train_ratio = 0.8
    batch_size = 32
    episodes = 500
    # data = pd.read_csv('D:\work\historykdata\investing_com\EUR_USD Historical Data.csv')  # Replace with your data file

    # 读取没有表头的 CSV 文件
    # data = pd.read_csv(r'D:\work\historykdata\histdata_com\xagusd_mt5\DAT_MT_XAGUSD_M1_2022.csv', header=None)
    data = pd.read_csv(r'/work/historykdata/histdata_com/xagusd_mt5/DAT_MT_XAGUSD_M1_2022.csv', header=None)

    # 为数据添加列名
    column_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    data.columns = column_names

    data = prepare_data(data, window_size)
    train_data, test_data = split_data(data, train_ratio)

    agent = LSTMTrader(input_dim=data.shape[2], action_space=4)

    for e in range(episodes):
        total_profit = 0
        position = None
        position_type = None
        entry_price = None
        state_size = data.shape[2]
        # state = np.array([train_data[i] for i in range(window_size)])
        # state = np.array([train_data[i] for i in range(window_size)], dtype=np.float32)
        # state = np.array([train_data[i].reshape(1, -1) for i in range(window_size)], dtype=np.float32)
        # state = np.array([train_data[i].reshape(1, -1) for i in range(window_size)], dtype=np.float32).reshape(1, 1, -1)
        # state = np.array([train_data[i] for i in range(window_size)], dtype=np.float32).reshape(1, -1)
        # state = np.array([train_data[i].flatten() for i in range(window_size)], dtype=np.float32)
        state = np.array(train_data[0], dtype=np.float32).flatten()






        for t in range(window_size, len(train_data) - 1):
            # state = np.reshape(state, [1, 1, state_size])
            state = np.array([train_data[t - window_size:t]])

            action = agent.act(state)

            # next_state = np.array([train_data[t + 1]])
            # next_state = np.array([train_data[t + 1]], dtype=np.float32)
            # next_state = np.array([train_data[t + 1].reshape(1, -1)], dtype=np.float32)
            # next_state = np.array([train_data[t + 1].reshape(1, -1)], dtype=np.float32).reshape(1, 1, -1)
            # next_state = np.array([train_data[t + 1]], dtype=np.float32).reshape(1, -1)
            # next_state = np.array([train_data[t + 1 - window_size : t + 1].flatten()], dtype=np.float32)
            next_state = np.array(train_data[t], dtype=np.float32).flatten()


            reward = 0
            done = False

            past_high = train_data[t][-1][6]  # Get the past high price
            past_low = train_data[t][-1][7]  # Get the past low price

            take_profit_level = past_high
            stop_loss_level = past_low

            if position is not None:
                if position_type == "long":
                    if train_data[t][-1][3] >= take_profit_level:
                        total_profit += train_data[t][-1][3] - position
                        position = None
                        position_type = None
                        reward = 1
                        done = True
                    elif train_data[t][-1][3] <= stop_loss_level:
                        total_profit -= train_data[t][-1][3] - position
                        position = None
                        position_type = None
                        reward = -1
                        done = True
                elif position_type == "short":
                    if train_data[t][-1][3] <= take_profit_level:
                        total_profit += position - train_data[t][-1][3]
                        position = None
                        position_type = None
                        reward = 1
                        done = True
                    elif train_data[t][-1][3] >= stop_loss_level:
                        total_profit -= position - train_data[t][-1][3]
                        position = None
                        position_type = None
                        reward = -1
                        done = True

            if action == 1:
                position_type = "long"
                entry_price = train_data[t][-1][3]
                position = entry_price
            elif action == 2:
                position_type = "short"
                entry_price = train_data[t][-1][3]
                position = entry_price

            agent.remember(state, action, reward, next_state, done)
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

            state = next_state


        if e % 10 == 0:
            agent.save(f"lstm_trader_tmp/lstm_trader-{e}.h5")

        print("Episode: {}/{}, Total Profit: {}".format(e, episodes, total_profit))


if __name__ == "__main__":
    main()
代码片

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值