【机器学习 sklearn】XGBclassifier 超参数寻优

代码片段

# encoding: utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import pandas as pd

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')


# print train.info()
# print test.info()

selected_features = ['Pclass', 'Sex', 'Age', 'Embarked', 'SibSp', 'Parch','Fare']
X_train = train[selected_features]
X_test = test[selected_features]
y_train = train['Survived']

#############缺失值处理#########
print X_train['Embarked'].value_counts()
print X_test['Embarked'].value_counts()

X_train['Embarked'].fillna('S', inplace=True)
X_test['Embarked'].fillna('S', inplace=True)
X_train['Age'].fillna(X_train['Age'].mean(), inplace=True)
X_test['Age'].fillna(X_test['Age'].mean(), inplace=True)
X_test['Fare'].fillna(X_test['Fare'].mean(), inplace=True)

###检查
# print X_train.info()
# print X_test.info()

#####特征向量化############
from sklearn.feature_extraction import DictVectorizer
dict_vec = DictVectorizer(sparse=False)

###########归一化和标准化#################
X_train = dict_vec.fit_transform(X_train.to_dict(orient='record'))

X_test = dict_vec.transform(X_test.to_dict(orient='record'))


from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier()


from xgboost import XGBClassifier

xgbc = XGBClassifier()


from sklearn.model_selection import cross_val_score

print cross_val_score(rfc, X_train, y_train, cv=5).mean()




cross_val_score(xgbc, X_train, y_train, cv=5).mean()

rfc.fit(X_train,y_train)
rfc_y_predict = rfc.predict(X_test)

rfc_submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': rfc_y_predict})
# rfc_submission.to_csv('rfc_submission.csv', index=False)


xgbc.fit(X_train, y_train)

xgbc_y_predict = xgbc.predict(X_test)
xgbc_submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': xgbc_y_predict})
# xgbc_submission.to_csv('xgbc_submission.csv', index=False)



from sklearn.grid_search import GridSearchCV

params = {'max_depth':range(2, 7), 'n_estimators':range(100, 1100, 200), 'learning_rate':[0.05, 0.1, 0.25, 0.5, 1.0]}

xgbc_best = XGBClassifier()

gs = GridSearchCV(xgbc_best, params, n_jobs=-1, cv=5, verbose=1)

gs.fit(X_train, y_train)



print gs.best_score_
print gs.best_params_

xgbc_best_y_predict = gs.predict(X_test)


xgbc_best_submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': xgbc_best_y_predict})

#xgbc_best_submission.to_csv('xgbc_best_submission.csv', index=False)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东华果汁哥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值