贝叶斯详细的理论介绍这里就不说了,可以上网查看。这里写一个demo
package sparksql
import org.apache.spark.mllib.classification.NaiveBayes
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.{SparkConf, SparkContext}
//贝叶斯分类算法
object sparkBYSFL {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local[*]")
val sc = new SparkContext(conf)
val data = sc.textFile("")
val demo = data.map { line =>
val parts = line.split(",")
LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(" ").map(_.toDouble)))
}
val sp = demo.randomSplit(Array(0.6,0.4),seed = 11L)
val train = sp(0)
val testing = sp(1)
val model = NaiveBayes.train(train,lambda = 1.0)
val pre = testing.map(p=>(model.predict(p.features),p.label))
val prin = pre.take(20)
for (i<-0 to prin.length-1){
println(prin(i)._1+"\t"+prin(i)._2)
}
val accuracy=1.0 *pre.filter(x=>x._1==x._2).count()//计算准确度
}
}