用scala程序实现saveAsLibSVMFile的文件输出

Project: mlbench   Author: mlbench   File: PrepareData.scala 5 votes vote downvote up
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
import utils.Utils

import scala.util.Try
import scalax.file.Path



class PrepArgParser(arguments: Seq[String]) extends org.rogach.scallop.ScallopConf(arguments) {
  val dataset = opt[String](required = true, short = 'd',
    descr = "absolute address of the libsvm dataset. This must be provided.")
  val partitions = opt[Int](required = false, default = Some(4), short = 'p', validate = (0 <),
    descr = "Number of spark partitions to be used. Optional.")
  val dir = opt[String](required = true, default = Some("../results/"), short = 'w', descr = "working directory where results " +
    "are stored. Default is \"../results\". ")
  val method = opt[String](required = true, short = 'm',
    descr = "Method can be either \"Regression\" or \"Classification\". This must be provided")
  verify()
}

object PrepareData {
  def main(args: Array[String]) {
    //Spark conf
    val conf = new SparkConf().setAppName("Distributed Machine Learning").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    //Turn off logs
    val rootLogger = Logger.getRootLogger()
    rootLogger.setLevel(Level.ERROR)
    //Parse arguments
    val parser = new PrepArgParser(args)
    val dataset = parser.dataset()
    var workingDir = parser.dir()
    val numPartitions = parser.partitions()
    val method = parser.method()

    //Load data
    val (train, test) = method match {
      case "Classification" => Utils.loadAbsolutLibSVMBinaryClassification(dataset, numPartitions, sc)
      case "Regression" => Utils.loadAbsolutLibSVMRegression(dataset, numPartitions, sc)
      case _ => throw new IllegalArgumentException("The method " + method + " is not supported.")
    }

    // append "/" to workingDir if necessary
    workingDir = workingDir + ( if (workingDir.takeRight(1) != "/") "/" else "" )
    val trainPath: Path = Path.fromString(workingDir + "train")
    Try(trainPath.deleteRecursively(continueOnFailure = false))
    val testPath: Path = Path.fromString(workingDir + "test")
    Try(testPath.deleteRecursively(continueOnFailure = false))
    MLUtils.saveAsLibSVMFile(train, workingDir + "train")
    MLUtils.saveAsLibSVMFile(test, workingDir + "test")
  }
} 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值