1.简介
Catboost是基于梯度提升的决策树模型。
优点:
- 和GBDT库比,具有更好的性能;
- 最好的分类预测速度;
- 同时支持数值型和类别型的特征;
- 更快的GPU和多GPU支持;
- 包含可视化的工具。
2 分类实例
from catboost import Pool, CatBoostClassifier
import numpy as np
import matplotlib.pyplot as plt
#生成用于分类的数据集
from sklearn.datasets.samples_generator import make_classification
X,labels=make_classification(n_samples=2000,n_features=20,n_redundant=0,n_informative=4,n_classes= 3,
random_state=1,n_clusters_per_class=2)
rng=np.random.RandomState(2)
X+=2*rng.uniform(size=X.shape)
labels = np.reshape(labels, (len(labels),-1))
X_y = np.concatenate([X,labels], axis = 1)
train_num = int(len(X)*0.8)
train_data = X_y[:train_num,:len(X_y[0])-1]
eval_data = X_y[train_num:, :len(X_y[0])-1]
cat_features = []
train_label = X_y[:train_num, len(X_y[0])-1]
eval_label = X_y[train_num:, len(X_y[0])-1]
print(train_data.shape)
print(train_data)
print(eval_data.shape)
print(train_label.shape)
print(eval_label.shape)
train_dataset = Pool(data=train_data,
label=train_label,
)
eval_dataset = Pool(data=eval_data,
label=eval_label,
)
# Initialize CatBoostClassifier
model = CatBoostClassifier(iterations=10,
learning_rate=1,
depth=2,
loss_function='MultiClass')
# Fit model
model.fit(train_dataset)
# Get predicted classes
preds_class = model.predict(eval_dataset)
print(preds_class)
# Get predicted probabilities for each class
preds_proba = model.predict_proba(eval_dataset)
# Get predicted RawFormulaVal
preds_raw = model.predict(eval_dataset,
prediction_type='RawFormulaVal')
print(preds_proba)
参考: