python gdbt.apply_用Python实现GBDT回归

最新推荐文章于 2022-05-11 21:28:48 发布

weixin_39887386

最新推荐文章于 2022-05-11 21:28:48 发布

阅读量88

点赞数

文章标签： python gdbt.apply

该博客介绍了如何用Python实现梯度提升回归算法，具体包括GradientBoostingRegressor类的定义，其内部fit和predict方法的详细过程，以及训练模型并评估准确性的示例。在示例中，数据集为波士顿房价数据，采用100个决策树，学习率为0.1，最大深度为2，并进行了随机抽样。

摘要由CSDN通过智能技术生成

from regression_tree import RegressionTree

from copy import copy

from utils import load_boston_house_prices, train_test_split, get_r2, run_time

from random import sample

from math import log, exp

class GradientBoostingRegressor(object):

def __init__(self):

"""GBDT class for regression.Attributes:trees{list}: 1d list with RegressionTree objectslr{float}: Learning rate"""

self.trees = None

self.lr = None

self.init_val = None

def fit(self, X, y, n_estimators, lr, max_depth, min_samples_split, subsample=None):

"""Build a gradient boost decision tree.Set MSE as loss function, and c is a constant:L = MSE(y, c) = Sum((yi-c) ^ 2) / m, yi

n = len(y)

self.trees = []

self.lr = lr

self.init_val = sum(y) / n

residual = [yi - self.init_val for yi in y]

for _ in range(n_estimators):

# Sampling without replacement

if subsample is None:

idx = range(n)

else:

k = int(subsample * n)

idx = sample(range(n), k)

X_sub = [X[i] for i in idx]

residual_sub = [residual[i] for i in idx]

# Train Regression Tree by sub-sample of X, y

tree = RegressionTree()

tree.fit(X_sub, residual_sub, max_depth, min_samples_split)

# Calculate residual

residual = [r - lr * r_hat for r,

r_hat in zip(residual, tree.predict(X))]

self.trees.append(tree)

def _predict(self, Xi):

"""Auxiliary function of predict.Arguments:row{list}-- 1D list with int or floatReturns:int or float -- prediction of yi"""

# Sum y_hat with residuals of each tree

return self.init_val + sum(self.lr * tree._predict(Xi) for tree in self.trees)

def predict(self, X):

"""Get the prediction of y.Arguments:X{list}-- 2d list object with int or floatReturns:list -- 1d list object with int or float"""

return [self._predict(Xi) for Xi in X]

@run_time

def main():

print("Tesing the accuracy of GBDT...")

# Load data

X, y = load_boston_house_prices()

# Split data randomly, train set rate 70%

X_train, X_test, split_train, split_test = train_test_split(

X, y, random_state=10)

# Train model

reg = GradientBoostingRegressor()

reg.fit(X=X_train, y=split_train, n_estimators=100,

lr=0.1, max_depth=2, min_samples_split=2, subsample=0.95)

# Model accuracy

get_r2(reg, X_test, split_test)

if __name__ == "__main__":

main()

weixin_39887386

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python gdbt.apply_用Python实现GBDT回归

from regression_tree import RegressionTreefrom copy import copyfrom utils import load_boston_house_prices, train_test_split, get_r2, run_timefrom random import samplefrom math import log, expclass Gra...
复制链接

扫一扫