以使用pyspark的随机森林作为例子:
#! /usr/bin/python3
#-*-coding:utf-8-*-
from pyspark import SparkContext,SparkConf
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
from pyspark.mllib.tree import RandomForest
from pyspark.sql import SQLContext
# Configuration if you use spark-submit
conf = SparkConf().setAppName("Test Application")
conf = conf.setMaster("local[10]")
sc = SparkContext(conf=conf)
sqlCtx = SQLContext(sc)
def create_label_point(line):
line=line.strip().split(',')
return LabeledPoint(int(line[-1]), [float(x)