'''
思路:
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1. 進行Kn次的OGA的迭代,其中Kn是OGA的迭代上限
2. 獲得k個回歸因子後,計算高維信息準則HDIC;
3. 選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4. 使用Trim獲得最小的相關數據集合
1. Kn計算 1
2. OGA算法實現
3. HDIC公式 1
4. Trim 1
5. choosMink
6. getTrim
Train:
1.
Test:
pre:
cv:
'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17
y, X, w = make_sparse_coded_signal(
n_samples=1,
n_components=n_components,
n_features=n_features,
n_nonzero_coefs=n_nonzero_coefs,
random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)
class OGA_HDIC_Trim:
def __init__(self, df):
self.df = df
self.row, self.col = df.shape
self.sigma = np.inf
self.newproject = pd.DataFrame()
self.coef = np.array([])
self.u = df.iloc[:, -1].values.reshape((-1, 1))
self.OGA()
def MaxIterKn(self, D = 5):
'''
Kn = O (n / log(p))^0.5
取 D (n / log(p))^0.5
:return:
'''
return int(D * (self.row * np.log(self.col)) ** 0.5)
def HDIC(self, IC = 'HDBIC', c=2):
'''
根據w的值不同,我們的信息準則也不同
HDIC = n * log(sigma ^ 2) + #J * wn * logp
n = the numbers of saple
p = the numbers of dimension
sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
#J : 進入模型的樣本數量
:param wn: HDBIC wn = log(n) , wn = c * log(log(n))
:return: HDIC
'''
fir = self.row * np.log(self.sigma)
sec = self.newproject.sape[1] * np.log(self.col)
if IC == 'HDBIC':
return fir + sec * np.log(self.row)
elif IC == 'HDHQ':
return fir + sec * c * np.log(np.log(self.row))
def _x_project(self, x_new):
'''
self.newproject = [[x1_, x2_, x3_]]
x_new
x_new = x_new - (x_new * x1) / x1 ** 2 x1
:param X:
:return:
'''
newproject = self.newproject.values
valprojectX = np.sum(x_new * newproject, axis=0)
valprojextS = np.sum(newproject ** 2, axis=0)
coef = valprojectX / valprojextS
x_project = (np.sum( coef * newproject, axis = 1)).reshape((x_new.shape[0], -1))
return x_new - x_project
def OGA(self):
'''
1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
找到當前最優的x_(k+1)_
2. 根據正交投影計算出最新的特征
x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
一次選擇出一個特征
:return: 返回 x_new_
'''
col1 = ''
coef = 0
error = np.inf
u = np.array([])
for col in self.df.columns:
if col in self.newproject.columns:
continue
model = linear_model.LinearRegression(fit_intercept=False)
model.fit(self.df[col].values.reshape(-1,1), self.u)
predict = model.predict(df[col])
temp_error = np.sum( (self.u - predict) ** 2) / self.row
if temp_error < error:
col1 = col
error = temp_error
coef = model.coef_
u = predict
print(predict.shape)
self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
self.coef = np.append(self.coef,coef)
self.u = u.values.reshape((-1,1))
OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)
```python
'''
思路:
算法實現
OGA+ HDIC + Trim
HDIC = HDBIC , HDHQ
1. 進行Kn次的OGA的迭代,其中Kn是OGA的迭代上限
2. 獲得k個回歸因子後,計算高維信息準則HDIC;
3. 選擇OGA迭代結束時可以最小化HDIC的k, 1 <= k <= Kn,
4. 使用Trim獲得最小的相關數據集合
1. Kn計算 1
2. OGA算法實現
3. HDIC公式 1
4. Trim 1
5. choosMink
6. getTrim
Train:
1.
Test:
pre:
cv:
'''
from sklearn.model_selection import KFold
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.datasets import make_sparse_coded_signal
n_components, n_features = 512, 100
n_nonzero_coefs = 17
y, X, w = make_sparse_coded_signal(
n_samples=1,
n_components=n_components,
n_features=n_features,
n_nonzero_coefs=n_nonzero_coefs,
random_state=0,
)
df = np.column_stack((X,y.T))
col = ['a_'+str(i) for i in range(df.shape[1])]
df = pd.DataFrame(df, columns=col)
print(df.shape)
class OGA_HDIC_Trim:
def __init__(self, df):
self.df = df
self.row, self.col = self.df.shape
self.columns = self.df.columns
self.u = self.df.iloc[:,-1].values.astype(np.float32).reshape((-1,1))
self.X_feat_np = self.df.iloc[:, :-1].values.astype(np.float32)
self.sigma = np.inf
self.columnsind = []
self.newproject = np.array([])
self.coef = np.array([])
self.OGA()
def MaxIterKn(self, D = 5):
'''
Kn = O (n / log(p))^0.5
取 D (n / log(p))^0.5
:return:
'''
return int(D * (self.row * np.log(self.col)) ** 0.5)
def HDIC(self, IC = 'HDBIC', c=2):
'''
根據w的值不同,我們的信息準則也不同
HDIC = n * log(sigma ^ 2) + #J * wn * logp
n = the numbers of saple
p = the numbers of dimension
sigma : standard deviation---> sigma = 1/ n * (sum_1^n (y_t - y_t_J) ** 2)
#J : 進入模型的樣本數量
:param wn: HDBIC wn = log(n) , wn = c * log(log(n))
:return: HDIC
'''
fir = self.row * np.log(self.sigma)
sec = self.newproject.shape[1] * np.log(self.col)
if IC == 'HDBIC':
return fir + sec * np.log(self.row)
elif IC == 'HDHQ':
return fir + sec * c * np.log(np.log(self.row))
def _x_project(self, x_new):
'''
self.newproject = [[x1_, x2_, x3_]]
x_new
x_new = x_new - (x_new * x1) / x1 ** 2 x1
:param X:
:return:
'''
valprojectX = np.sum(x_new * self.newproject, axis=0)
valprojextS = np.sum(self.newproject ** 2, axis=0)
coef = valprojectX / valprojextS
x_project = (np.sum( coef * self.newproject, axis = 1)).reshape((x_new.shape[0], -1))
return x_new - x_project
def OGA(self):
'''
1. 根據y_(k+1)_(X) = y_k_(X + \beta_k_J(k+1)_ x_(k+1)_
找到當前最優的x_(k+1)_
2. 根據正交投影計算出最新的特征
x_new_ = x_(k+1)_ - x_project(x_(k+1)_)
一次選擇出一個特征
:return: 返回 x_new_
'''
col1 = ''
coef = 0
error = np.inf
u = np.array([])
for col in self.df.columns:
if col in self.newproject.columns:
continue
model = linear_model.LinearRegression(fit_intercept=False)
model.fit(self.df[col].values.reshape(-1,1), self.u)
predict = model.predict(df[col])
temp_error = np.sum( (self.u - predict) ** 2) / self.row
if temp_error < error:
col1 = col
error = temp_error
coef = model.coef_
u = predict
print(predict.shape)
self.newproject = pd.concat([self.newproject, self.df[col1]], axis=0)
self.coef = np.append(self.coef,coef)
self.u = u.values.reshape((-1,1))
OGA = OGA_HDIC_Trim(df)
print(OGA)
print(OGA.MaxIterKn(D = 2))
print(OGA.u)
```python
在这里插入import numpy as np
import pandas as pd
import ta
import random
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import Adam
def add_technical_indicators(data, window_size=10):
data['MA_5'] = ta.trend.sma_indicator(data['Close'], window=5)
data['MA_10'] = ta.trend.sma_indicator(data['Close'], window=10)
data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi()
data['BB_HIGH'], data['BB_MID'], data['BB_LOW'] = ta.volatility.BollingerBands(
data['Close']).bollinger_hband(), ta.volatility.BollingerBands(
data['Close']).bollinger_mavg(), ta.volatility.BollingerBands(data['Close']).bollinger_lband()
data['Past_High'] = data['High'].rolling(window=window_size).max()
data['Past_Low'] = data['Low'].rolling(window=window_size).min()
data.dropna(inplace=True)
return data
class LSTMTrader:
def __init__(self, input_dim, action_space):
self.input_dim = input_dim
self.action_space = action_space
self.memory = deque(maxlen=2000)
self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
model = Sequential()
model.add(Dense(units=64, input_dim=self.input_dim, activation="relu"))
model.add(Dense(units=32, activation="relu"))
model.add(Dense(units=self.action_space, activation="linear"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_space)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self, name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
def preprocess_data(data, window_size):
result = []
for i in range(len(data) - window_size + 1):
result.append(data[i: i + window_size])
return np.array(result)
def prepare_data(data, window_size):
data = add_technical_indicators(data)
data = data.drop(columns=['Date', 'Time'])
data = data.to_numpy()
data = preprocess_data(data, window_size)
return data
def split_data(data, train_ratio):
split_index = int(train_ratio * len(data))
train_data = data[:split_index]
test_data = data[split_index:]
return train_data, test_data
def main():
window_size = 10
train_ratio = 0.8
batch_size = 32
episodes = 500
data = pd.read_csv(r'/work/historykdata/histdata_com/xagusd_mt5/DAT_MT_XAGUSD_M1_2022.csv', header=None)
column_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
data.columns = column_names
data = prepare_data(data, window_size)
train_data, test_data = split_data(data, train_ratio)
agent = LSTMTrader(input_dim=data.shape[2], action_space=4)
for e in range(episodes):
total_profit = 0
position = None
position_type = None
entry_price = None
state_size = data.shape[2]
state = np.array(train_data[0], dtype=np.float32).flatten()
for t in range(window_size, len(train_data) - 1):
state = np.array([train_data[t - window_size:t]])
action = agent.act(state)
next_state = np.array(train_data[t], dtype=np.float32).flatten()
reward = 0
done = False
past_high = train_data[t][-1][6]
past_low = train_data[t][-1][7]
take_profit_level = past_high
stop_loss_level = past_low
if position is not None:
if position_type == "long":
if train_data[t][-1][3] >= take_profit_level:
total_profit += train_data[t][-1][3] - position
position = None
position_type = None
reward = 1
done = True
elif train_data[t][-1][3] <= stop_loss_level:
total_profit -= train_data[t][-1][3] - position
position = None
position_type = None
reward = -1
done = True
elif position_type == "short":
if train_data[t][-1][3] <= take_profit_level:
total_profit += position - train_data[t][-1][3]
position = None
position_type = None
reward = 1
done = True
elif train_data[t][-1][3] >= stop_loss_level:
total_profit -= position - train_data[t][-1][3]
position = None
position_type = None
reward = -1
done = True
if action == 1:
position_type = "long"
entry_price = train_data[t][-1][3]
position = entry_price
elif action == 2:
position_type = "short"
entry_price = train_data[t][-1][3]
position = entry_price
agent.remember(state, action, reward, next_state, done)
if len(agent.memory) > batch_size:
agent.replay(batch_size)
state = next_state
if e % 10 == 0:
agent.save(f"lstm_trader_tmp/lstm_trader-{e}.h5")
print("Episode: {}/{}, Total Profit: {}".format(e, episodes, total_profit))
if __name__ == "__main__":
main()
代码片