OGA code

sophiaRachel

已于 2023-05-15 01:10:18 修改

阅读量70

点赞数

文章标签： python 机器学习人工智能

于 2023-05-12 17:36:18 首次发布

本文链接：https://blog.csdn.net/qq_40445763/article/details/130647241

版权

# OGA + HDIC + Trim
'''
思路：
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1.	進行Kn次的OGA的迭代，其中Kn是OGA的迭代上限
2.	獲得k個回歸因子後，計算高維信息準則HDIC;
3.	選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4.	使用Trim獲得最小的相關數據集合

1. Kn計算  1
2. OGA算法實現
3. HDIC公式  1
4. Trim      1
5. choosMink
6. getTrim

Train:
1.

Test:

pre:

cv:

'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17

# generate the data
# y = Xw
# |x|_0 = n_nonzero_coefs

y, X, w = make_sparse_coded_signal(
    n_samples=1,
    n_components=n_components,
    n_features=n_features,
    n_nonzero_coefs=n_nonzero_coefs,
    random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)

class OGA_HDIC_Trim:
    def __init__(self, df):
        self.df = df
        self.row, self.col = df.shape
        self.sigma = np.inf      # 模型當前的標準差
        self.newproject = pd.DataFrame()   # 選擇出的特征經過投影後的表示
        self.coef = np.array([])
        self.u = df.iloc[:, -1].values.reshape((-1, 1)) # 當前的損失
        self.OGA()

    def MaxIterKn(self, D = 5):
        '''
        Kn = O (n / log(p))^0.5
        取 D (n / log(p))^0.5
        :return:
        '''
        return int(D * (self.row * np.log(self.col)) ** 0.5)
    def HDIC(self, IC = 'HDBIC', c=2):
        '''
        根據w的值不同，我們的信息準則也不同
        HDIC = n * log(sigma ^ 2) + #J * wn * logp
        n = the numbers of saple
        p = the numbers of dimension
        sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
        #J : 進入模型的樣本數量
        :param wn: HDBIC wn = log(n) ,  wn = c * log(log(n))
        :return: HDIC
        '''
        fir = self.row * np.log(self.sigma)
        sec = self.newproject.sape[1] * np.log(self.col)
        if IC == 'HDBIC':
            return  fir + sec * np.log(self.row)
        elif IC == 'HDHQ':
            return fir + sec * c * np.log(np.log(self.row))
    def _x_project(self, x_new):
        '''
        self.newproject = [[x1_, x2_, x3_]]
        x_new
        x_new = x_new - (x_new * x1) / x1 ** 2   x1
        :param X:
        :return:
        '''
        newproject = self.newproject.values
        valprojectX = np.sum(x_new * newproject, axis=0)
        valprojextS = np.sum(newproject ** 2, axis=0)
        coef = valprojectX / valprojextS
        x_project = (np.sum( coef * newproject, axis = 1)).reshape((x_new.shape[0], -1))
        return  x_new - x_project

    def OGA(self):
        '''
        1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
        找到當前最優的x_(k+1)_
        2. 根據正交投影計算出最新的特征
        x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
        一次選擇出一個特征
        :return: 返回 x_new_
        '''
        col1 = ''
        coef = 0
        error = np.inf  # 記錄每一個特征的誤差 col : u
        u = np.array([])  #
        # 最小二乘法計算每一個特征
        for col in self.df.columns:
            if col in self.newproject.columns:
                continue
            # u, col
            model = linear_model.LinearRegression(fit_intercept=False)
            model.fit(self.df[col].values.reshape(-1,1), self.u)
            predict = model.predict(df[col])
            temp_error = np.sum( (self.u - predict) ** 2) / self.row
            if temp_error < error:
                col1 = col
                error = temp_error
                coef = model.coef_
                u = predict
                print(predict.shape)
        self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
        self.coef = np.append(self.coef,coef)
        self.u = u.values.reshape((-1,1))


OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)






```python
# OGA + HDIC + Trim
'''
思路：
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1.	進行Kn次的OGA的迭代，其中Kn是OGA的迭代上限
2.	獲得k個回歸因子後，計算高維信息準則HDIC;
3.	選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4.	使用Trim獲得最小的相關數據集合

1. Kn計算  1
2. OGA算法實現
3. HDIC公式  1
4. Trim      1
5. choosMink
6. getTrim

Train:
1.

Test:

pre:

cv:
'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17

# generate the data
# y = Xw
# |x|_0 = n_nonzero_coefs

y, X, w = make_sparse_coded_signal(
    n_samples=1,
    n_components=n_components,
    n_features=n_features,
    n_nonzero_coefs=n_nonzero_coefs,
    random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)

class OGA_HDIC_Trim:
    def __init__(self, df):
        self.df = df
        self.row, self.col = self.df.shape
        self.columns = self.df.columns
        self.u = self.df.iloc[:,-1].values.astype(np.float32).reshape((-1,1))   # 當前的損失
        self.X_feat_np = self.df.iloc[:, :-1].values.astype(np.float32)
        self.sigma = np.inf      # 模型當前的標準差
        self.columnsind = []  # 选出的特征索引
        self.newproject = np.array([])   # 選擇出的特征經過投影後的表示
        self.coef = np.array([])
        self.OGA()

    def MaxIterKn(self, D = 5):
        '''
        Kn = O (n / log(p))^0.5
        取 D (n / log(p))^0.5
        :return:
        '''

        return int(D * (self.row * np.log(self.col)) ** 0.5)

    def HDIC(self, IC = 'HDBIC', c=2):
        '''
        根據w的值不同，我們的信息準則也不同
        HDIC = n * log(sigma ^ 2) + #J * wn * logp
        n = the numbers of saple
        p = the numbers of dimension
        sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
        #J : 進入模型的樣本數量
        :param wn: HDBIC wn = log(n) ,  wn = c * log(log(n))
        :return: HDIC
        '''
        fir = self.row * np.log(self.sigma)
        sec = self.newproject.shape[1] * np.log(self.col)
        if IC == 'HDBIC':
            return  fir + sec * np.log(self.row)
        elif IC == 'HDHQ':
            return fir + sec * c * np.log(np.log(self.row))

    def _x_project(self, x_new):
        '''
        self.newproject = [[x1_, x2_, x3_]]
        x_new
        x_new = x_new - (x_new * x1) / x1 ** 2   x1
        :param X:
        :return:
        '''
        valprojectX = np.sum(x_new * self.newproject, axis=0)  # 分子
        valprojextS = np.sum(self.newproject ** 2, axis=0)     # 分母
        coef = valprojectX / valprojextS
        x_project = (np.sum( coef * self.newproject, axis = 1)).reshape((x_new.shape[0], -1))
        return  x_new - x_project

    def OGA(self):
        '''
        1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
        找到當前最優的x_(k+1)_
        2. 根據正交投影計算出最新的特征
        x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
        一次選擇出一個特征
        :return: 返回 x_new_
        '''
        col1 = ''
        coef = 0
        error = np.inf  # 記錄每一個特征的誤差 col : u
        u = np.array([])  #
        # 最小二乘法計算每一個特征
        for col in self.df.columns:
            if col in self.newproject.columns:
                continue
            # u, col
            model = linear_model.LinearRegression(fit_intercept=False)
            model.fit(self.df[col].values.reshape(-1,1), self.u)
            predict = model.predict(df[col])
            temp_error = np.sum( (self.u - predict) ** 2) / self.row
            if temp_error < error:
                col1 = col
                error = temp_error
                coef = model.coef_
                u = predict
                print(predict.shape)
        self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
        self.coef = np.append(self.coef,coef)
        self.u = u.values.reshape((-1,1))


OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)


```python
在这里插入import numpy as np
import pandas as pd
import ta
import random
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import Adam


def add_technical_indicators(data, window_size=10):
    data['MA_5'] = ta.trend.sma_indicator(data['Close'], window=5)
    data['MA_10'] = ta.trend.sma_indicator(data['Close'], window=10)
    data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi()
    data['BB_HIGH'], data['BB_MID'], data['BB_LOW'] = ta.volatility.BollingerBands(
        data['Close']).bollinger_hband(), ta.volatility.BollingerBands(
        data['Close']).bollinger_mavg(), ta.volatility.BollingerBands(data['Close']).bollinger_lband()
    data['Past_High'] = data['High'].rolling(window=window_size).max()
    data['Past_Low'] = data['Low'].rolling(window=window_size).min()
    data.dropna(inplace=True)
    return data


class LSTMTrader:
    def __init__(self, input_dim, action_space):
        self.input_dim = input_dim
        self.action_space = action_space
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.input_dim, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=self.action_space, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
        return model

    # def build_model(self):
    #     model = Sequential()
    #     model.add(Dense(units=64, input_dim=self.state_size * self.num_features, activation="relu"))
    #     # model.add(LSTM(64, input_shape=(1, self.input_dim), return_sequences=True))
    #     model.add(Dropout(0.2))
    #     model.add(LSTM(32))
    #     model.add(Dropout(0.2))
    #     model.add(Dense(self.action_space, activation='linear'))
    #     model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
    #     return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_space)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


def preprocess_data(data, window_size):
    result = []
    for i in range(len(data) - window_size + 1):
        result.append(data[i: i + window_size])
    return np.array(result)


def prepare_data(data, window_size):
    data = add_technical_indicators(data)
    data = data.drop(columns=['Date', 'Time'])  # Add this line
    data = data.to_numpy()
    data = preprocess_data(data, window_size)
    return data


def split_data(data, train_ratio):
    split_index = int(train_ratio * len(data))
    train_data = data[:split_index]
    test_data = data[split_index:]
    return train_data, test_data


def main():
    window_size = 10
    train_ratio = 0.8
    batch_size = 32
    episodes = 500
    # data = pd.read_csv('D:\work\historykdata\investing_com\EUR_USD Historical Data.csv')  # Replace with your data file

    # 读取没有表头的 CSV 文件
    # data = pd.read_csv(r'D:\work\historykdata\histdata_com\xagusd_mt5\DAT_MT_XAGUSD_M1_2022.csv', header=None)
    data = pd.read_csv(r'/work/historykdata/histdata_com/xagusd_mt5/DAT_MT_XAGUSD_M1_2022.csv', header=None)

    # 为数据添加列名
    column_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    data.columns = column_names

    data = prepare_data(data, window_size)
    train_data, test_data = split_data(data, train_ratio)

    agent = LSTMTrader(input_dim=data.shape[2], action_space=4)

    for e in range(episodes):
        total_profit = 0
        position = None
        position_type = None
        entry_price = None
        state_size = data.shape[2]
        # state = np.array([train_data[i] for i in range(window_size)])
        # state = np.array([train_data[i] for i in range(window_size)], dtype=np.float32)
        # state = np.array([train_data[i].reshape(1, -1) for i in range(window_size)], dtype=np.float32)
        # state = np.array([train_data[i].reshape(1, -1) for i in range(window_size)], dtype=np.float32).reshape(1, 1, -1)
        # state = np.array([train_data[i] for i in range(window_size)], dtype=np.float32).reshape(1, -1)
        # state = np.array([train_data[i].flatten() for i in range(window_size)], dtype=np.float32)
        state = np.array(train_data[0], dtype=np.float32).flatten()






        for t in range(window_size, len(train_data) - 1):
            # state = np.reshape(state, [1, 1, state_size])
            state = np.array([train_data[t - window_size:t]])

            action = agent.act(state)

            # next_state = np.array([train_data[t + 1]])
            # next_state = np.array([train_data[t + 1]], dtype=np.float32)
            # next_state = np.array([train_data[t + 1].reshape(1, -1)], dtype=np.float32)
            # next_state = np.array([train_data[t + 1].reshape(1, -1)], dtype=np.float32).reshape(1, 1, -1)
            # next_state = np.array([train_data[t + 1]], dtype=np.float32).reshape(1, -1)
            # next_state = np.array([train_data[t + 1 - window_size : t + 1].flatten()], dtype=np.float32)
            next_state = np.array(train_data[t], dtype=np.float32).flatten()


            reward = 0
            done = False

            past_high = train_data[t][-1][6]  # Get the past high price
            past_low = train_data[t][-1][7]  # Get the past low price

            take_profit_level = past_high
            stop_loss_level = past_low

            if position is not None:
                if position_type == "long":
                    if train_data[t][-1][3] >= take_profit_level:
                        total_profit += train_data[t][-1][3] - position
                        position = None
                        position_type = None
                        reward = 1
                        done = True
                    elif train_data[t][-1][3] <= stop_loss_level:
                        total_profit -= train_data[t][-1][3] - position
                        position = None
                        position_type = None
                        reward = -1
                        done = True
                elif position_type == "short":
                    if train_data[t][-1][3] <= take_profit_level:
                        total_profit += position - train_data[t][-1][3]
                        position = None
                        position_type = None
                        reward = 1
                        done = True
                    elif train_data[t][-1][3] >= stop_loss_level:
                        total_profit -= position - train_data[t][-1][3]
                        position = None
                        position_type = None
                        reward = -1
                        done = True

            if action == 1:
                position_type = "long"
                entry_price = train_data[t][-1][3]
                position = entry_price
            elif action == 2:
                position_type = "short"
                entry_price = train_data[t][-1][3]
                position = entry_price

            agent.remember(state, action, reward, next_state, done)
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

            state = next_state


        if e % 10 == 0:
            agent.save(f"lstm_trader_tmp/lstm_trader-{e}.h5")

        print("Episode: {}/{}, Total Profit: {}".format(e, episodes, total_profit))


if __name__ == "__main__":
    main()
代码片