文章目录
下载gcforest
介绍gcforest算法原理的文章在网上有很多,这里不再赘述。
简单代码实现
把下载好的gcforest文件夹放在自己的项目里或者直接放到lib里,然后代码如下:
import numpy as np
from gcforest.gcforest import GCForest
from sklearn.datasets import load_iris, load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris = []
data = []
def get_toy_config():
config = {}
ca_config = {}
ca_config["random_state"] = 0
ca_config["max_layers"] = 100 #最大的层数,layer对应论文中的level
ca_config["early_stopping_rounds"] = 3 #如果出现某层的三层以内的准确率都没有提升,层中止
ca_config["n_classes"] = 3 # 类别数
ca_config["estimators"] = []
ca_config["estimators"].append(
{"n_folds": 5, "type": "XGBClassifier", "n_estimators": 10, "max_depth": 5,
"objective": "multi:softprob", "silent": True, "nthread": -1, "learning_rate": 0.1} )
ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
ca_config["estimators"].append({"n_folds": 5, "type": "ExtraTreesClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
ca_config["estimators"].append({"n_folds": 5, "type": "LogisticRegression"})
config["cascade"] = ca_config
return config
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
iris = load_iris()
X = iris.data
y = iris.target
# print(y)
X_train, X_test, y_train, y_truth = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=111,
stratify=y)
model = GCForest(get_toy_config()) ##构建模型
model.fit_transform(X_train, y_train) ##训练
y_predict = model.predict(np.array(X_test)) ##预测
# joblib.dump(model, 'irisModel.sav') ##保存模型
print(y_predict)
print("accuracy:", accuracy_score(y_truth, y_predict))
注意! 针对不同的数据集要修改ca_config["n_classes"] = 3 # 类别数
的参数为对应类别数,否则会报AssertionError错误