python gdbt.apply_用Python实现GBDT回归

该博客介绍了如何用Python实现梯度提升回归算法,具体包括GradientBoostingRegressor类的定义,其内部fit和predict方法的详细过程,以及训练模型并评估准确性的示例。在示例中,数据集为波士顿房价数据,采用100个决策树,学习率为0.1,最大深度为2,并进行了随机抽样。
摘要由CSDN通过智能技术生成

from regression_tree import RegressionTree

from copy import copy

from utils import load_boston_house_prices, train_test_split, get_r2, run_time

from random import sample

from math import log, exp

class GradientBoostingRegressor(object):

def __init__(self):

"""GBDT class for regression.Attributes:trees{list}: 1d list with RegressionTree objectslr{float}: Learning rate"""

self.trees = None

self.lr = None

self.init_val = None

def fit(self, X, y, n_estimators, lr, max_depth, min_samples_split, subsample=None):

"""Build a gradient boost decision tree.Set MSE as loss function, and c is a constant:L = MSE(y, c) = Sum((yi-c) ^ 2) / m, yi

n = len(y)

self.trees = []

self.lr = lr

self.init_val = sum(y) / n

residual = [yi - self.init_val for yi in y]

for _ in range(n_estimators):

# Sampling without replacement

if subsample is None:

idx = range(n)

else:

k = int(subsample * n)

idx = sample(range(n), k)

X_sub = [X[i] for i in idx]

residual_sub = [residual[i] for i in idx]

# Train Regression Tree by sub-sample of X, y

tree = RegressionTree()

tree.fit(X_sub, residual_sub, max_depth, min_samples_split)

# Calculate residual

residual = [r - lr * r_hat for r,

r_hat in zip(residual, tree.predict(X))]

self.trees.append(tree)

def _predict(self, Xi):

"""Auxiliary function of predict.Arguments:row{list}-- 1D list with int or floatReturns:int or float -- prediction of yi"""

# Sum y_hat with residuals of each tree

return self.init_val + sum(self.lr * tree._predict(Xi) for tree in self.trees)

def predict(self, X):

"""Get the prediction of y.Arguments:X{list}-- 2d list object with int or floatReturns:list -- 1d list object with int or float"""

return [self._predict(Xi) for Xi in X]

@run_time

def main():

print("Tesing the accuracy of GBDT...")

# Load data

X, y = load_boston_house_prices()

# Split data randomly, train set rate 70%

X_train, X_test, split_train, split_test = train_test_split(

X, y, random_state=10)

# Train model

reg = GradientBoostingRegressor()

reg.fit(X=X_train, y=split_train, n_estimators=100,

lr=0.1, max_depth=2, min_samples_split=2, subsample=0.95)

# Model accuracy

get_r2(reg, X_test, split_test)

if __name__ == "__main__":

main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值