#-*- codeing = utf-8 -*-
#@Time : 2020/4/17 10:35
#@Author : LIU J
#@File : Xgboost.py
#@Software: PyCharm
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
dataset = pd.read_csv("pima-india-diabetes.csv")
#split data
X = dataset[:,0:8]
Y = dataset[:,8]
#split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
#fit model
model = XGBClassifier()
eval_set = [(X_test, y_test)]
model.fit(X_train,y_train, early_stopping_rounds=10, eval_metric="logloss", eval_set=eval_set, verbose=True)
# early_stopping_rounds=10,意思是如果10次model添加进去都无改善则停止
# val_metric="logloss",model添加导致的改善评价的标准
# eval_set=eval_set ,每次评价输出储存的set
# verbose=True, True打印评价数值
#make predcition
y_pred = model.predict(X_test)
prediction = [round(value) for value in y_pred]
#evaluate predicitons
accuracy = accuracy_score(y_test, prediction)
print("Accuracy:%f%%"%(accuracy*100))
Xgboost算法
最新推荐文章于 2023-03-22 20:39:16 发布