模型融合

任务5 模型融合

  1. stacking
  2. 简单加权融合
  3. blending
  4. boosting
  5. bagging
  6. 将多个模型结果再放入模型中预测

1.将特征放进模型中预测,并将预测结果作为新的特征加入原有特征中再经过模型预测结果(可以反复预测多次将结果加入最后的特征中)

from sklearn.model_selection import KFold
    folds = KFold(n_splits=5, shuffle=True, random_state=2333)
   
    "===================================第一轮========================================================"
    y_pre_list = []
    r2_list = []
    train_feat = pd.Series()
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(feature.values, label)):
        print("fold {}".format(fold_))
        trn_data = lgb.Dataset(feature.iloc[trn_idx], label[trn_idx], categorical_feature=categorical_feats)
        val_data = lgb.Dataset(feature.iloc[val_idx], label[val_idx], categorical_feature=categorical_feats)

        num_round = 10000
        clf = lgb.train(params, trn_data, num_round,valid_sets=[trn_data, val_data], verbose_eval=500,
                    early_stopping_rounds=200)
        y_pre = clf.predict(feature.iloc[val_idx], num_iteration=clf.best_iteration)
        r2 = r2_score(y_pre,label[val_idx])
        r2_list.append(r2)
        train_feat = train_feat.append(pd.Series(y_pre,index=val_idx))
        y_pre_test = clf.predict(test,num_iteration=clf.best_iteration)
        y_pre_list.append(y_pre_test)
    print('r2 score{:}'.format(r2))
    print('r2:{:}'.format(np.mean(r2_list)))

    y_pred_final=  (y_pre_list[0]+y_pre_list[1]+y_pre_list[2]+y_pre_list[3]+y_pre_list[4])/5
    feature['pre'] = train_feat
    test['pre'] = y_pred_final
    "===================================第二轮========================================================"
    y_pre_list = []
    r2_list = []
    train_feat = pd.Series()
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(feature.values, label)):
        print("fold {}".format(fold_))
        trn_data = lgb.Dataset(feature.iloc[trn_idx], label[trn_idx], categorical_feature=categorical_feats)
        val_data = lgb.Dataset(feature.iloc[val_idx], label[val_idx], categorical_feature=categorical_feats)

        num_round = 10000
        clf = lgb.train(params, trn_data, num_round, feval=get_r2_metric,valid_sets=[trn_data, val_data], verbose_eval=500,
                    early_stopping_rounds=200)
        y_pre = clf.predict(feature.iloc[val_idx], num_iteration=clf.best_iteration)
        r2 = r2_score(y_pre,label[val_idx])
        r2_list.append(r2)
        train_feat = train_feat.append(pd.Series(y_pre,index=val_idx))
        y_pre_test = clf.predict(test,num_iteration=clf.best_iteration)
        y_pre_list.append(y_pre_test)
    print('r2 score{:}'.format(r2))
    print('r2:{:}'.format(np.mean(r2_list)))
    
    y_pred_final=  (y_pre_list[0]+y_pre_list[1]+y_pre_list[2]+y_pre_list[3]+y_pre_list[4])/5
    feature['pre_2'] = train_feat
    test['pre_2'] = y_pred_final
=======================第三轮========================================================"
    y_pre_list = []
    r2_list = []
    train_feat = pd.Series()
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(feature.values, label)):
        print("fold {}".format(fold_))
        trn_data = lgb.Dataset(feature.iloc[trn_idx], label[trn_idx], categorical_feature=categorical_feats)
        val_data = lgb.Dataset(feature.iloc[val_idx], label[val_idx], categorical_feature=categorical_feats)

        num_round = 10000
        clf = lgb.train(params, trn_data, num_round, feval=get_r2_metric,valid_sets=[trn_data, val_data], verbose_eval=500,
                    early_stopping_rounds=200)
        y_pre = clf.predict(feature.iloc[val_idx], num_iteration=clf.best_iteration)
        r2 = r2_score(y_pre,label[val_idx])
        r2_list.append(r2)
        train_feat = train_feat.append(pd.Series(y_pre,index=val_idx))
        y_pre_test = clf.predict(test,num_iteration=clf.best_iteration)
        y_pre_list.append(y_pre_test)
    print('r2 score{:}'.format(r2))
    print('r2:{:}'.format(np.mean(r2_list)))
    
    y_pred_final=  (y_pre_list[0]+y_pre_list[1]+y_pre_list[2]+y_pre_list[3]+y_pre_list[4])/5
    
    return y_pred_final

2.pre1-pren分别是n组模型预测出来的结果,将其进行加权融合

pre = (pre1 + pre2 + pre3 +...+pren )/n

pd.DataFrame(pre).to_csv("pre.csv",header=None,index=None)

3.blending

def blend(train,test,target):
    '''5折'''
    # n_flods = 5
    # skf = list(StratifiedKFold(y, n_folds=n_flods))
    '''切分训练数据集为d1,d2两部分'''
    X_d1, X_d2, y_d1, y_d2 = train_test_split(train, target, test_size=0.5, random_state=914)

    train_ = np.zeros((X_d2.shape[0],len(clfs*3)))
    test_ = np.zeros((test.shape[0],len(clfs*3)))

    for j,clf in enumerate(clfs):
        '''依次训练各个单模型'''
        # print(j, clf)
        '''使用第1个部分作为预测,第2部分来训练模型,获得其预测的输出作为第2部分的新特征。'''
        # X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
        X_d1fillna=X_d1.fillna(0)
        X_d2fillna = X_d2.fillna(0)

        X_predictfillna= test.fillna(0)

        clf.fit(X_d1fillna,y_d1)
        y_submission = clf.predict(X_d2fillna)
        y_test_submission = clf.predict(X_predictfillna)

        train_[:,j*3] = y_submission*y_submission
        '''对于测试集,直接用这k个模型的预测值作为新的特征。'''
        test_[:, j*3] = y_test_submission*y_test_submission

        train_[:, j+1] =(y_submission - y_submission.min()) /(y_submission.max() - y_submission.min())
        '''对于测试集,直接用这k个模型的预测值作为新的特征。'''
        y_test_submission = (y_test_submission - y_test_submission.min()) / \
                            (y_test_submission.max() - y_test_submission.min())
        test_[:, j+1] = y_test_submission

        train_[:, j+2] = np.log(y_submission)
        '''对于测试集,直接用这k个模型的预测值作为新的特征。'''
        y_test_submission =np.log(y_test_submission)
        test_[:, j+2] = y_test_submission



        # print("val auc Score: %f" % r2_score(y_predict, dataset_d2[:, j]))
        print('已完成第',j)

    train_.to_csv('./input/train_blending.csv', index=False)
    test_.to_csv('./input/test_blending.csv', index=False)
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值