之前的一篇转载介绍了用IDEA开发spark应用的配置方法。在完成配置之后,我写了下面的demo code。
object LogReg {
def main (args: Array[String]) {
val conf = new SparkConf().setAppName("spark demo").setMaster("spark://master.local:7077")
val sc = new SparkContext(conf)
// parse the dimension
if (args.length != 1) {
System.err.println("Need one argument")
System.exit(-1)
}
val dimension = Integer.parseInt(args(0))
System.out.println("Dimension = " + dimension)
// load the training data
val trainData = MLUtils.loadLibSVMFile(sc,
"hdfs://master.local:9000/user/xxxx/input/out5_training.log",
dimension)
// train
val numIterations = 100
val model = LogisticRegressionWithSGD.train(trainData, numIterations)
val valuesAndPreds = trainData.map { point =>
val prediction = model.predict(point.features)
(prediction, point.label)
}
val trainErrors = valuesAndPreds.filter{case (v,p) => v != p}
val trainErr = trainErrors.count.toDouble / trainData.count
System.out.println("Training error = " + trainErr)
// test
val testData = MLUtils.loadLibSVMFile(sc,
"hdfs://master.local:9000/user/xxxx/input/out5_testing.log",
dimension)
val testAndPreds = testData.map { point =>
val