class Tradaboost_R2:
def __init__(self, base_estimator=DecisionTreeRegressor(), epoch=10):
"""
base_estimator:基学习器
epoch:迭代轮数
"""
self.base_estimator = base_estimator # 基学习器可以自行选择XGB,LGMB等
self.epoch = epoch
self.regressors = []
def fit(self, x_source, x_target, y_source, y_target):
# 将源域和目标域转化为数组
x_train = np.concatenate((x_source, x_target), axis=0)
y_train = np.concatenate((y_source, y_target), axis=0)
x_train = np.asarray(x_train, order='C')
y_train = np.asarray(y_train, order='C')
y_source = np.asarray(y_source, order='C')
y_target = np.asarray(y_target, order='C')
row_source = x_source.shape[0]
row_target = x_target.shape[0]
# 初始化权重
weight_source = 1 * np.ones([row_source, 1]) / row_source # 这边源域和目标域的初始化权重可以自行优化
weight_target = 1 * np.ones([row_target, 1]) / row_target
weights = np.concatenate((weight_source, weight_target), axis=0)
# 初始化beta和result
beta = 1 / (1 + np.sqrt(2 * np.log(row_source / self.epoch)))
bata_T = np.zeros([1, self.epoch])
result = np.ones([row_source + row_target, self.epoch])
for i in range(self.epoch):
# 令sum(weights)=1
weights = self._calculate_weight(weights)
print(f'-----The weights of epoch {i} is-----:', weights, sep='\n')
# 拟合学习器
self.base_estimator.fit(x_train, y_train, sample_weight=weights[:, 0])
self.regressors.append(copy.deepcopy(self.base_estimator))
result[:, i] = self.base_estimator.predict(x_train)
# print(result[:, i])
# 计算误差率
error_rate = self._calculate_error_rate(y_target, result[row_source:, i], weights[row_source:, :])
# print(f'-----The accuracy of epoch {i} is-----:', xgb_mape(y_target,result[row_source:, i]), sep='\n')
if error_rate > 0.5:
error_rate = 0.5
if error_rate <= 1e-10:
self.epoch = i
print("Early stopping...")
break
bata_T[0, i] = error_rate / (1 - error_rate)
# 更新权重
D_t = np.abs(np.array(result[:, i]) - np.array(y_train)).max()
for j in range(row_target):
weights[row_source + j] = weights[row_source + j] * np.power(bata_T[0, i], -(np.abs(result[row_source + j, i] - y_target[j]) / D_t)) # 权重的更新方式可以优化,根据需求加快或者放慢权重的更新速度
for j in range(row_source):
weights[j] = weights[j] * np.power(beta, np.abs(result[j, i] - y_source[j]) / D_t)
def predict(self, test):
result = np.ones([test.shape[0], self.epoch + 1])
predict = np.ones([test.shape[0]])
i = 0
# 每一轮迭代都预测一次测试集
for regressor in self.regressors:
pred = regressor.predict(test)
result[:, i] = pred
i += 1
# 通过集成来获取最终预测结果
for j in range(test.shape[0]):
predict[j] = np.sum(
result[j, int(np.floor(self.epoch / 2)):self.epoch]) / (self.epoch - int(np.floor(self.epoch / 2))) # 结果部分也可以自行优化,可选择最后k次结果进行融合
return predict
def _calculate_weight(self, weights):
sum_weight = np.sum(weights)
return np.asarray(weights / sum_weight, order='C')
def _calculate_error_rate(self, y_target, y_predict, weight):
total = np.abs(y_target - y_predict).max()
return np.sum(weight[:, 0] * np.abs(y_target - y_predict) / total)
Tradaboost_R2模型源码
于 2023-09-05 16:16:59 首次发布