1.求统计数值 V1.0
val conf = new SparkConf().setAppName("hh")
conf.setMaster("local[3]")
val sc = new SparkContext(conf)
val data = sc.textFile("/home/hadoop4/Desktop/i.txt")
.map(_.split("\t")).map(f => f.map(f => f.toDouble))
.map(f => ("k"+f(0),f(1)))
// variance
//data:RDD[(String,Double)]
val dataArr = data.map(f=>(f._1,ArrayBuffer(f._2)))
//dataArr RDD[(String,ArrayBuffer[Double])]
dataArr.collect().foreach(println(_))
//output
(k1.0,ArrayBuffer(2.0))
(k1.0,ArrayBuffer(4.0))
(k4.0,ArrayBuffer(5.0))
(k4.0,ArrayBuffer(7.0))
(k7.0,ArrayBuffer(8.0))
(k10.0,ArrayBuffer(11.0))
(k10.0,ArrayBuffer(13.0))
(k10.0,ArrayBuffer(1.0))
(k1.0,ArrayBuffer(100.0))
(k10.0,ArrayBuffer(11.0))
(k10.0,ArrayBuffer(11.0))
(k1.0,ArrayBuffer(2.0))
(k4.0,ArrayBuffer(7.0))
val dataArrRed = dataArr.reduceByKey((x,y)=>x++=y)
//dataArrRed :RDD[(String,ArrayBuffer[Double])]
dataArrRed.collect().foreach(println(_))
//output
(k1.0,ArrayBuffer(2.0, 4.0, 100.0, 2.0))
(k7.0,ArrayBuffer(8.0))
(k10.0,ArrayBuffer(11.0, 13.0, 1.0, 11.0, 11.0))
(k4.0,ArrayBuffer(5.0, 7.0, 7.0))
val