为课本上的经典例题,一时觉得用scala可能会比较方便?于是便这样做了。
import org.apache.spark.rdd.RDD
import org.apache.spark.util.LongAccumulator
import org.apache.spark.{SparkConf, SparkContext}
import scala.util.Random
object bayes {
def randomString(len: Int): String = {
val rand = new scala.util.Random(System.nanoTime)
val sb = new StringBuilder(len)
val ab = "MF"
for (i <- 0 until len) {
sb.append(ab(rand.nextInt(ab.length)))
}
sb.toString
}
def part_prability(data:RDD[String],cuts:String,cutPositon:Int): Double ={
val data1=data.filter({
x=>{val words=x.split(" "); words(cutPositon).equals(cuts)}})
.map({x=>{val words=x.split(" "); (words(cutPositon),words(4))}})
val a_all=data1.count()
val a_buy=data1.filter(x=>x._2.equals("是"))
val a_buynum=a_buy.count()
val res=a_buynum.toDouble/a_all.toDouble
res
}
def part_prability2(data:RDD[String],cuts:String,cutPositon:Int): Double ={
// 青 高 否 中 否
val data1=data.filter({
x=>{val words=x.split(" "); words(4).equals("是")}})
.map({x=>{val words=x.split(" "); (words(cutPositon),words(4))}})
val a_all=data1.count()
val a_buy=data1.filter(x=>x._1.equals(cuts))
val a_buynum=a_buy.count()
val res=a_buynum.toDouble/a_all.toDouble
res
}
def apart(data:RDD[String],cuts:String,cutPositon:Int): Double ={
// 青 高 否 中 否
val data1=data
.map({x=>{val words=x.split(" "); (words(cutPositon),1)}})
val a_all=data1.count()
val a_buy=data1.filter(x=>x._1.equals(cuts))
val a_buynum=a_buy.count()
val res=a_buynum.toDouble/a_all.toDouble
res
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("WordCount")
.setMaster("local")
val sc = new SparkContext(conf)
val datat=sc.textFile("C:\\Users\\86158\\Desktop\\作业\\spark\\work3\\bayes.txt")
// 人群总购买率 (买的概率)
val purchase:Double=0.6429
println("plez input 0 or 1: 0 for end and 1 for begin")
var signal=Console.readInt()
while (signal==1){
println("请输入年龄变量:")
var age=Console.readLine()
println("请输入收入变量:")
var salary=Console.readLine()
println("请输入爱好变量:")
var hobby=Console.readLine()
println("请输入信用变量:")
var credit=Console.readLine()
var ageing=part_prability2(datat,age,0)
var salaring=part_prability2(datat,salary,1)
var hobbing=part_prability2(datat,hobby,2)
var creding=part_prability2(datat,credit,3)
var apartment=apart(datat,age,0)*apart(datat,salary,1)*apart(datat,hobby,2)*apart(datat,credit,3)
var result=(ageing)*salaring*hobbing*creding*purchase/apartment
println(result)
if (result>0.5){
println(">0.5 很可能会购买")
}
else{
println("<0.5 购买的概率不大")
}
println("plez input your signal:")
signal=Console.readInt()
}
}
}
结果
ps后续:老师布置了用python / java 来完成这个作业,我将代码分享给了同学,所以全班几乎有一半交的这份代码。。。我真是醉了