import org.apache.log4j.{Level, Logger} import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.{SparkConf, SparkContext} object spark_mllib { def main(args: Array[String]): Unit = { Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) val conf = new SparkConf().setAppName("spark_mllib").setMaster("local[2]") val sc =new SparkContext(conf) // val spark = SparkSession.builder().getOrCreate() val data = sc.textFile("C:/Users/Lenovo/Desktop/data.txt") //将数据转换为Rating类型,即[Int,Int,Double]的Rdd val parsedData =data.map { line => val parts = line.split(',') LabeledPoint(parts(0).toDouble,Vectors.dense(parts(1).split(' ').map(_.toDouble))) } // parsedData.foreach(println) //将原始数据划分为训练数据和测试数据,其中60% 为训练集 40 % 的为测试集 val splits = parsedData.randomSplit(Array(0.7,0.3),seed = 11L) val training =splits(0) val test =splits(1) //获得训练参数模型,第一个为数据,第二个参数为平滑参数 val model =NaiveBayes.train(training,lambda = 1.0) //对测试样本进行测试 val predictionAndLabel= test.map(p => (model.predict(p.features),p.label)) // //对模型进行准确度分析 val accuracy =1.0 *predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() println("NaiveBayes精度----->" + accuracy) //我们这里特地打印一个预测值:假如一天是 晴天(0)凉(2)高(0)高(1) 是否训练 println("test:0, 2, 0, 1--->" + model.predict(Vectors.dense(0.0,2.0,0.0,1.0))) } } //原始数据 //0,0 0 0 0 //0,0 0 0 1 //1,1 0 0 0 //1,2 1 0 0 //1,2 2 1 0 //0,2 2 1 1 //1,1 2 1 1 //0,0 1 0 0 //1,0 2 1 0 //1,2 1 1 0 //1,0 1 1 1
spark mlib NaiveBayes 笔记
最新推荐文章于 2022-11-29 19:07:59 发布