xgboost网格搜索算法调参+多标签分类预测项目实战
文章目录
一、前言
极值梯度提升算法(XGBoost)是华盛顿大学学者陈天奇在2014年提出的,因其性能的优越性而广泛应用于工业和人工智能竞赛。
二、模型建立流程
1.引入库
代码如下(示例):
##加载库
#------------------------------------------------------------------------------
import xlrd
import numpy as np
#import xgboost as xgb
import time
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from xgboost import plot_tree
from sklearn import metrics
from sklearn.model_selection import train_test_split #数据集划分
from sklearn.metrics import accuracy_score #预测准确率
from xgboost import plot_importance #特征重要性分析
from sklearn.metrics import classification_report #分类报告
from sklearn.metrics import f1_score, precision_score, recall_score
2.读入数据
代码如下(示例):
#------------------------------------------------------------------------------
def excel2matrix(path):
data = xlrd.open_workbook(path)
table = data.sheets()[0]
nrows = table.nrows # 行数
ncols = table.ncols # 列数
datamatrix = np.zeros((nrows, ncols))
for i in range(nrows):
rows = table.row_values(i)
datamatrix[i,:] = rows
return datamatrix
3.划分训练集和测试集
def datapart(X,y):
x_train,x_test,y_train,y_test = train_test_split(X,y,
test_size = 0.2,
random_state