支持向量机可用来解决一般线性回归和逻辑回归不好处理的问题,准确性比较好。MLlib中对支持向量机有较好的支持,它的使用方法与逻辑回归类似。
package classify
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.regression.LabeledPoint
object svm {
def parseLine(line: String): LabeledPoint = {
val parts = line.split(" ")
val vd: Vector = Vectors.dense(parts(1).toDouble, parts(2).toDouble, parts(3).toDouble)
return LabeledPoint(parts(0).toDouble, vd )
}
def main(args: Array[String]){
val conf = new SparkConf().setMaster(args(0)).setAppName("svm")
val sc = new SparkContext(conf)
val data = sc.textFile(args(1)).map(parseLine(_))
val splits = data.randomSplit(Array(0.6, 0.4), seed=11L)
val trainData = splits(0)
val testData = splits(1)
val model = SVMWithSGD.train(trainData, 50)
println(model.weights.size)
println(model.weights)
println(model.weights.toArray.filter(_ != 0).size)
val predictionAndLabel = testData.map(p => (model.predict(p.features), p.label))
predictionAndLabel.foreach(println)
val metrics = new MulticlassMetrics(predictionAndLabel)
val precision = metrics.precision
println("Precision = " + precision)
}
}
调用SVMWithSGD.train方法得到训练模型,50为迭代次数。