Tradaboost_R2模型源码

brave player

已于 2023-09-05 16:19:39 修改

阅读量405

点赞数 2

文章标签： python 算法

于 2023-09-05 16:16:59 首次发布

本文链接：https://blog.csdn.net/m0_69890175/article/details/132694587

版权

class Tradaboost_R2:
    def __init__(self, base_estimator=DecisionTreeRegressor(), epoch=10):
        """
        base_estimator:基学习器
        epoch:迭代轮数
        """
        self.base_estimator = base_estimator  # 基学习器可以自行选择XGB,LGMB等
        self.epoch = epoch
        self.regressors = []

    def fit(self, x_source, x_target, y_source, y_target):
        # 将源域和目标域转化为数组
        x_train = np.concatenate((x_source, x_target), axis=0)
        y_train = np.concatenate((y_source, y_target), axis=0)
        x_train = np.asarray(x_train, order='C')
        y_train = np.asarray(y_train, order='C')
        y_source = np.asarray(y_source, order='C')
        y_target = np.asarray(y_target, order='C')
        row_source = x_source.shape[0]
        row_target = x_target.shape[0]

        # 初始化权重
        weight_source = 1 * np.ones([row_source, 1]) / row_source  # 这边源域和目标域的初始化权重可以自行优化
        weight_target = 1 * np.ones([row_target, 1]) / row_target
        weights = np.concatenate((weight_source, weight_target), axis=0)
        # 初始化beta和result
        beta = 1 / (1 + np.sqrt(2 * np.log(row_source / self.epoch)))
        bata_T = np.zeros([1, self.epoch])
        result = np.ones([row_source + row_target, self.epoch])
        for i in range(self.epoch):
            # 令sum(weights)=1
            weights = self._calculate_weight(weights)
            print(f'-----The weights of epoch {i} is-----:', weights, sep='\n')
            # 拟合学习器
            self.base_estimator.fit(x_train, y_train, sample_weight=weights[:, 0])
            self.regressors.append(copy.deepcopy(self.base_estimator))
            result[:, i] = self.base_estimator.predict(x_train)
            # print(result[:, i])
            # 计算误差率
            error_rate = self._calculate_error_rate(y_target, result[row_source:, i], weights[row_source:, :])
            # print(f'-----The accuracy of epoch {i} is-----:', xgb_mape(y_target,result[row_source:, i]), sep='\n')
            if error_rate > 0.5:
                error_rate = 0.5
            if error_rate <= 1e-10:
                self.epoch = i
                print("Early stopping...")
                break
            bata_T[0, i] = error_rate / (1 - error_rate)
            # 更新权重
            D_t = np.abs(np.array(result[:, i]) - np.array(y_train)).max()
            for j in range(row_target):
                weights[row_source + j] = weights[row_source + j] * np.power(bata_T[0, i], -(np.abs(result[row_source + j, i] - y_target[j]) / D_t))  # 权重的更新方式可以优化，根据需求加快或者放慢权重的更新速度
            for j in range(row_source):
                weights[j] = weights[j] * np.power(beta, np.abs(result[j, i] - y_source[j]) / D_t)

    def predict(self, test):
        result = np.ones([test.shape[0], self.epoch + 1])
        predict = np.ones([test.shape[0]])
        i = 0
        # 每一轮迭代都预测一次测试集
        for regressor in self.regressors:
            pred = regressor.predict(test)
            result[:, i] = pred
            i += 1
        # 通过集成来获取最终预测结果
        for j in range(test.shape[0]):
            predict[j] = np.sum(
                result[j, int(np.floor(self.epoch / 2)):self.epoch]) / (self.epoch - int(np.floor(self.epoch / 2)))  # 结果部分也可以自行优化，可选择最后k次结果进行融合
        return predict

    def _calculate_weight(self, weights):
        sum_weight = np.sum(weights)
        return np.asarray(weights / sum_weight, order='C')

    def _calculate_error_rate(self, y_target, y_predict, weight):
        total = np.abs(y_target - y_predict).max()
        return np.sum(weight[:, 0] * np.abs(y_target - y_predict) / total)