import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.util.MLUtils
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache()
val numClasses = 2
val categoricalFeaturesInfo = Map[Int, Int]()
val impurity = "gini"
val maxDepth = 5
val maxBins = 100
val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity,maxDepth, maxBins)
val labelAndPreds = data.map { point => val prediction = model.predict(point.features) (point.label, prediction)}
val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / data.count
println("Training Error = " + trainErr)
println("Learned classification tree model:\n" + model)
转载于:https://my.oschina.net/u/1426212/blog/374966