一、问题描述
在windows下用Spark mlib跑机器学习模型时,保存模型到本地,出现空指针异常。代码如下:
import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.mllib.util.MLUtils;
object SVM {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local").setAppName("SVM")
val sc = new SparkContext(conf)
val data = MLUtils.loadLibSVMFile(sc,"D://spark/sample_libsvm_data.txt");
val splits = data.randomSplit(Array(0.6,0.4), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
//training.foreach(println)
val numIterations = 100
val model = SVMWithSGD.train(training, numIterations)
//model.clearThreshold();
println("######## Threshold is : " + model.getThreshold)
val scoreAndLabels = test.map { point =>
val score = model.predict(point.features)
(score, point.label)
}
// Get evaluation metrics.
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
model.save(sc, "file:///D://spark/SVMTrainingModel")
//val model = new LogisticRegressionWithLBFGS().setNumClasses(10).run(training)
}
}
在model.save处报如下错误:
首先以下异常信息:
16/08/26 18:34:58 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:362)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$33.apply(SparkContext.scala:1015)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$33.apply(SparkContext.scala:1015)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:176)
at org.apache.spark.rdd.HadoopRDD$$anonfu