Bagging&Boosting算法实战回归模型

最新推荐文章于 2022-05-26 16:08:08 发布

Mr Robot

最新推荐文章于 2022-05-26 16:08:08 发布

阅读量913

点赞数 2

分类专栏：人工智能机器学习文章标签： python 机器学习

本文链接：https://blog.csdn.net/leva345/article/details/119204239

版权

人工智能同时被 2 个专栏收录

135 篇文章 6 订阅

订阅专栏

机器学习

42 篇文章 3 订阅

订阅专栏

你怎么从全世界找到喜欢你的人？
select * from world where someone like ‘%you%’;

no results!

在这里插入图片描述

项目工程和数据集我上传了集成学习：随机森林、GBDT、XGBoost实战代码合集

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import sklearn
from sklearn.linear_model import LinearRegression, LassoCV, Ridge, ElasticNetCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.ensemble import BaggingRegressor,AdaBoostRegressor,GradientBoostingRegressor

## 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
## 拦截异常
warnings.filterwarnings(action = 'ignore', category=ConvergenceWarning)

def notEmpty(s):
    return s != ''

## 加载数据
names = ['CRIM','ZN', 'INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
path = "datas/boston_housing.data"
## 由于数据文件格式不统一，所以读取的时候，先按照一行一个字段属性读取数据，然后再安装每行数据进行处理
fd = pd.read_csv(path,header=None)
# print (fd.shape)
data = np.empty((len(fd), 14))
for i, d in enumerate(fd.values):#enumerate生成一列索 引i,d为其元素

    d = map(float, filter(notEmpty, d[0].split(' ')))#filter一个函数，一个list
    
    #根据函数结果是否为真，来过滤list中的项。
    data[i] = list(d)
    
## 分割数据
x, y = np.split(data, (13,), axis=1)
print (x[0:5])
y = y.ravel() # 转换格式 拉直操作
print (y[0:5])
ly=len(y)
print(y.shape)
print ("样本数据量:%d, 特征个数：%d" % x.shape)
print ("target样本数据量:%d" % y.shape[0])

[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
6.5200e+01 4.0900e+00 1.0000e+00 2.9600e+02 1.5300e+01 3.9690e+02
4.9800e+00]
[2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02
9.1400e+00]
[2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
6.1100e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9283e+02
4.0300e+00]
[3.2370e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.9980e+00
4.5800e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9463e+02
2.9400e+00]
[6.9050e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 7.1470e+00
5.4200e+01 6.0622e+00 3.0000e+00 2.2200e+02 1.8700e+01 3.9690e+02
5.3300e+00]]
[24. 21.6 34.7 33.4 36.2]
(506,)
样本数据量:506, 特征个数：13
target样本数据量:506

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=28)

## 线性回归模型
lr = Ridge(alpha=0.1)
lr.fit(x_train, y_train)
print("训练集上R^2:%.5f" % lr.score(x_train, y_train))
print("测试集上R^2:%.5f" % lr.score(x_test, y_test))

训练集上R^2:0.77123
测试集上R^2:0.56367

# 使用Bagging思想集成线性回归
bg = BaggingRegressor(Ridge(alpha=0.1), n_estimators=50, max_samples=0.7, max_features=0.8, random_state=28)
bg.fit(x_train, y_train)
print("训练集上R^2:%.5f" % bg.score(x_train, y_train))
print("测试集上R^2:%.5f" % bg.score(x_test, y_test))

训练集上R^2:0.76210
测试集上R^2:0.57320

# 使用AdaBoostRegressor
adr = AdaBoostRegressor(LinearRegression(), n_estimators=100, learning_rate=0.001, random_state=14)
adr.fit(x_train, y_train)
print("训练集上R^2:%.5f" % adr.score(x_train, y_train))
print("测试集上R^2:%.5f" % adr.score(x_test, y_test))

训练集上R^2:0.77334
测试集上R^2:0.56307

# 使用AdaBoostRegressor; GBDT模型只支持CART模型
gbdt = GradientBoostingRegressor(n_estimators=100, learning_rate=0.01, random_state=14)
gbdt.fit(x_train, y_train)
print("训练集上R^2:%.5f" % gbdt.score(x_train, y_train))
print("测试集上R^2:%.5f" % gbdt.score(x_test, y_test))

训练集上R^2:0.76489
测试集上R^2:0.64161

Mr Robot

关注

2
点赞
踩
6

收藏

觉得还不错? 一键收藏
打赏
1
评论
Bagging&Boosting算法实战回归模型

import numpy as npimport matplotlib as mplimport matplotlib.pyplot as pltimport pandas as pdimport warningsimport sklearnfrom sklearn.linear_model import LinearRegression, LassoCV, Ridge, ElasticNetCVfrom sklearn.preprocessing import PolynomialFeatu
复制链接

扫一扫