scikit-learn学习 2,超级详细解释,个人笔记
from sklearn.datasets import load_breast_cancer
X_breast, y_breast = load_breast_cancer(return_X_y=True)
# %load solutions/01_2_solutions.py
from sklearn.model_selection import train_test_split
X_breast_train, X_breast_test, y_breast_train, y_breast_test = train_test_split(X_breast, y_breast, stratify=y_breast, random_state=0, test_size=0.3)
#确保对数据进行分层(即使用 stratify参数)
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, random_state=0)
clf.fit(X_breast_train, y_breast_train)
#使用拟合分类器预测测试集的分类标签。
#梯度提升树(GBDT) 梯度提升算法是一种解决回归和分类问题的机器学习技术
#梯度提升算法可以看做是梯度下降法与加法模型的结合。
#求解参数
y_pred = clf.predict(X_breast_test)
#使用预测数据 使用拟合分类器预测测试集的分类标签
# %load solutions/01_5_solutions.py
from sklearn.metrics import balanced_accuracy_score
accuracy = balanced_accuracy_score(y_breast_test, y_pred)
print('Accuracy score of the {} is {:.2f}'.format(clf.__class__.__name__, accuracy))