xgboost

importxgboost

# First XGBoost model for Pima Indiansdataset

fromnumpyimportloadtxt

fromxgboostimportXGBClassifier

fromsklearn.model_selectionimporttrain_test_split

fromsklearn.metricsimportaccuracy_score

# load data

dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y

X = dataset[:,0:8]

Y = dataset[:,8]

# split data into train and test sets

seed = 7

test_size = 0.33

X_train, X_test, y_train, y_test =train_test_split(X, Y, test_size=test_size, random_state=seed)

# fit model no training data

model = XGBClassifier()

model.fit(X_train, y_train)

# make predictions for test data

y_pred = model.predict(X_test)

predictions = [round(value) for value iny_pred]

# evaluate predictions

accuracy = accuracy_score(y_test,predictions)

print("Accuracy:%.2f%%"% (accuracy *100.0))

Accuracy: 77.95%

fromnumpyimportloadtxt

fromxgboostimportXGBClassifier

fromsklearn.model_selectionimporttrain_test_split

fromsklearn.metricsimportaccuracy_score

# load data

dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y

X = dataset[:,0:8]

Y = dataset[:,8]

# split data into train and test sets

seed = 7

test_size = 0.33

X_train, X_test, y_train, y_test =train_test_split(X, Y, test_size=test_size, random_state=seed)

# fit model no training data

model = XGBClassifier()

eval_set = [(X_test, y_test)]

model.fit(X_train, y_train,early_stopping_rounds=10, eval_metric="logloss",eval_set=eval_set, verbose=True)

# make predictions for test data

y_pred = model.predict(X_test)

predictions = [round(value) for value iny_pred]

# evaluate predictions

accuracy = accuracy_score(y_test,predictions)

print("Accuracy:%.2f%%"% (accuracy *100.0))

[0]   validation_0-logloss:0.660186

Will train untilvalidation_0-logloss hasn't improved in 10 rounds.

[1]   validation_0-logloss:0.634854

[2]   validation_0-logloss:0.612239

[3]   validation_0-logloss:0.593118

[4]   validation_0-logloss:0.578303

[5]   validation_0-logloss:0.564942

[6]   validation_0-logloss:0.555113

[7]   validation_0-logloss:0.54499

[8]   validation_0-logloss:0.539151

[9]   validation_0-logloss:0.531819

[10]  validation_0-logloss:0.526065

[11]  validation_0-logloss:0.51977

[12]  validation_0-logloss:0.514979

[13]  validation_0-logloss:0.50927

[14]  validation_0-logloss:0.506086

[15]  validation_0-logloss:0.503565

[16]  validation_0-logloss:0.503591

[17]  validation_0-logloss:0.500805

[18]  validation_0-logloss:0.497605

[19]  validation_0-logloss:0.495328

[20]  validation_0-logloss:0.494777

[21]  validation_0-logloss:0.494274

[22]  validation_0-logloss:0.493333

[23]  validation_0-logloss:0.492211

[24]  validation_0-logloss:0.491936

[25]  validation_0-logloss:0.490578

[26]  validation_0-logloss:0.490895

[27]  validation_0-logloss:0.490646

[28]  validation_0-logloss:0.491911

[29]  validation_0-logloss:0.491407

[30]  validation_0-logloss:0.488828

[31]  validation_0-logloss:0.487867

[32]  validation_0-logloss:0.487297

[33]  validation_0-logloss:0.487562

[34]  validation_0-logloss:0.487788

[35]  validation_0-logloss:0.487962

[36]  validation_0-logloss:0.488218

[37]  validation_0-logloss:0.489582

[38]  validation_0-logloss:0.489334

[39]  validation_0-logloss:0.490969

[40]  validation_0-logloss:0.48978

[41]  validation_0-logloss:0.490704

[42]  validation_0-logloss:0.492369

Stopping. Bestiteration:

[32]  validation_0-logloss:0.487297

 

Accuracy: 78.35%

fromnumpyimportloadtxt

fromxgboostimportXGBClassifier

fromxgboostimportplot_importance

frommatplotlibimportpyplot

# load data

dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y

X = dataset[:,0:8]

y = dataset[:,8]

# fit model no training data

model = XGBClassifier()

model.fit(X, y)

# plot feature importance

plot_importance(model)

pyplot.show()

 

# Tune learning_rate

fromnumpyimportloadtxt

fromxgboostimportXGBClassifier

fromsklearn.model_selectionimportGridSearchCV

fromsklearn.model_selectionimportStratifiedKFold

# load data

dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

# split data into X and y

X = dataset[:,0:8]

Y = dataset[:,8]

# grid search

model = XGBClassifier()

learning_rate = [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3]

param_grid = dict(learning_rate=learning_rate)

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)

grid_search = GridSearchCV(model, param_grid,scoring="neg_log_loss", n_jobs=-1, cv=kfold)

grid_result = grid_search.fit(X, Y)

# summarize results

print("Best: %fusing %s"%(grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']

params = grid_result.cv_results_['params']

for mean, paraminzip(means, params):

print("%f  with: %r"% (mean, param))

Best: -0.483013 using{'learning_rate': 0.1}

-0.689650  with: {'learning_rate': 0.0001}

-0.661274  with: {'learning_rate': 0.001}

-0.530747  with: {'learning_rate': 0.01}

-0.483013  with: {'learning_rate': 0.1}

-0.515440  with: {'learning_rate': 0.2}

-0.557315  with: {'learning_rate': 0.3}

 

 

1.learning rate

2.tree

max_depth

min_child_weight

subsample, colsample_bytree

gamma

3.正则化参数

lambda

alpha

xgb1 = XGBClassifier(

learning_rate =0.1,

n_estimators=1000,

max_depth=5,

min_child_weight=1,

gamma=0,

subsample=0.8,

colsample_bytree=0.8,

objective= 'binary:logistic',

nthread=4,

scale_pos_weight=1,

seed=27)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值