SparkML实战之五:SVM

59 篇文章 0 订阅
27 篇文章 0 订阅
package MLlib

import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.util.MLUtils

/**
 * Created by root on 16-1-12.
 */
object SVM {
  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("SVM").setMaster("local[4]")
    val sc = new SparkContext(conf)
    // Load training data in LIBSVM format.
//    1 1 0 2.52078447201548 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0 2
//    0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0     0 0 0 0 0

    val data = MLUtils.loadLibSVMFile(sc, "/usr/local/spark/spark-1.6.0-bin-hadoop2.4" +
      "/data/mllib/sample_libsvm_data.txt")

    // Split data into training (60%) and test (40%).
    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
    val training = splits(0).cache()
    val test = splits(1)

    // Run training algorithm to build the model
    val numIterations = 100
    val model = SVMWithSGD.train(training, numIterations)


    // Clear the default threshold.
    model.clearThreshold()

    // Compute raw scores on the test set.
    val scoreAndLabels = test.map { point =>
      val score = model.predict(point.features)
      (score, point.label)
    }

    // Get evaluation metrics.
    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
    val auROC = metrics.areaUnderROC()

    println("Area under ROC = " + auROC)

    // Save and load model
    //保存模型后会之myModelPath下创建data和metadata目录保存模型对象
//    model.save(sc, "myModelPath")
//    val sameModel = SVMModel.load(sc, "myModelPath")
    //--------------------------------------------------------------------------
    //SVMWithSGD.train()方法默认使用正则参数为1.0的L2正则化
    //如果我们想配置算法,我们可以new 一个对象并调用setter方法,其实所有spark.mllib算法都支持这种
    //方式,例如,下面使用正则参数为0.1的L1正则来训练算法递归200次
    import org.apache.spark.mllib.optimization.L1Updater
    System.out.println("使用正则参数为0.1的L1正则来训练算法递归200次----------------" +
      "-------------------------------------------------")

    val svmAlg = new SVMWithSGD()
    svmAlg.optimizer.
      setNumIterations(200).
      setRegParam(0.1).
      setUpdater(new L1Updater)
    val modelL1 = svmAlg.run(training)
    // Compute raw scores on the test set.
    val L1scoreAndLabels = test.map { point =>
      val score = model.predict(point.features)
      (score, point.label)
    }

    // Get evaluation metrics.
    val L1metrics = new BinaryClassificationMetrics(L1scoreAndLabels)
    val L1auROC = L1metrics.areaUnderROC()

    println("L1正则的Area under ROC = " + L1auROC)


  }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值