XGBRegressor

最新推荐文章于 2024-08-13 14:53:52 发布

luoganttcc

最新推荐文章于 2024-08-13 14:53:52 发布

阅读量1.7w

点赞数 1

分类专栏：算法

本文链接：https://blog.csdn.net/luoganttcc/article/details/89097881

版权

算法专栏收录该内容

100 篇文章 1 订阅

订阅专栏

from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier,XGBRegressor
import matplotlib.pylab as plt
train = pd.read_csv('train.csv')

train=train.drop(train.columns[0],axis=1)
train = shuffle(train )

target = ['change','code','date']

test=pd.read_csv('test.csv')

test=test.drop(test.columns[0],axis=1)

x_train=train.drop(target,axis=1).values

y_train=train['change'].values
x_test=test.drop(target,axis=1).values



xgre=XGBRegressor(max_depth=30, learning_rate=0.01, n_estimators=5, 
             silent=True, objective='reg:linear', booster='gblinear', n_jobs=50, 
             nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, 
             colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1,
             scale_pos_weight=1, base_score=0.5, random_state=0, seed=None,
             missing=None, importance_type='gain')
print ("training...")
xgre.fit(x_train, y_train, verbose=True)

xgre.save_model('tree100.model')

print('training is ok')
fit_pred = xgre.predict(x_test)

#tar = xgb.Booster(model_file='tree100.model')
#x_test1 = xgb.DMatrix(x_test)
#fit_pred1 = tar.predict(x_test1)

inde=list(train.drop(target,axis=1).columns)


ww=(xgre.feature_importances_)
print(ww)            
feat_imp = pd.Series(ww,index=inde).sort_values(ascending=False)

feat_imp.to_excel('feature_importance.xlsx')

#print(feat_imp)
#plt.set_size_inches(20,10) 
feat_imp.plot(kind='bar', title='Feature Importances')