H2O-Spark : 使用H2O GBM运行在Spark 集群

2 篇文章 0 订阅

from h2o.estimators.gbm import H2OGradientBoostingEstimator
from pysparkling import *

#from h2o.estimators.deeplearning import H2ODeepLearningEstimator


ss = SparkSession.builder.appName("H2O-GBM").master("local[*]").config("spark.executor.memory", "2g").config("spark.driver.memory", "4g").config("spark.default.parallelism", "8").getOrCreate()


hc = H2OContext.getOrCreate(ss)

df = spark.read.csv(path='/home/jerry/geoplatform/gai_platform/data/feature_filter/user.csv', header=True, inferSchema=True)

 

 

 
h2o_df = hc.as_h2o_frame(df,framename='df_h20')

model_gbm = H2OGradientBoostingEstimator(ntrees=50,max_depth=6,learn_rate=0.1,distribution="bernoulli")

predictors = h2o_df.names[:]

 

ratios = [0.6,0.2]
frs = h2o_df.split_frame(ratios,seed=12345)
train = frs[0]
train.frame_id = "Train"
valid = frs[2]
valid.frame_id = "Validation"
test = frs[1]
test.frame_id = "Test"

 
model_gbm.train(x=predictors,y="target",training_frame=train,validation_frame=valid)


importance_cols = dict()
for i in model_gbm.varimp():
     importance_cols.setdefault(i[0],i[3])

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值