1.求统计数值 V1.0
val conf = new SparkConf().setAppName("hh" )
conf.setMaster("local[3]" )
val sc = new SparkContext(conf)
val data = sc.textFile("/home/hadoop4/Desktop/i.txt" )
.map(_.split("\t" )).map(f => f.map(f => f.toDouble))
.map(f => ("k" +f(0 ),f(1 )))
// variance
// data :RDD[(String,Double)]
val dataArr = data.map(f=>(f._1,ArrayBuffer(f._2)))
// dataArr RDD[(String,ArrayBuffer[Double])]
dataArr.collect().foreach(println(_))
// output
(k1.0 ,ArrayBuffer(2.0 ))
(k1.0 ,ArrayBuffer(4.0 ))
(k4.0 ,ArrayBuffer(5.0 ))
(k4.0 ,ArrayBuffer(7.0 ))
(k7.0 ,ArrayBuffer(8.0 ))
(k10.0 ,ArrayBuffer(11.0 ))
(k10.0 ,ArrayBuffer(13.0 ))
(k10.0 ,ArrayBuffer(1.0 ))
(k1.0 ,ArrayBuffer(100.0 ))
(k10.0 ,ArrayBuffer(11.0 ))
(k10.0 ,ArrayBuffer(11.0 ))
(k1.0 ,ArrayBuffer(2.0 ))
(k4.0 ,ArrayBuffer(7.0 ))
val dataArrRed = dataArr.reduceByKey((x,y)=>x++=y)
//dataArrRed :RDD [(String,ArrayBuffer[Double]) ]
dataArrRed .collect () .foreach (println(_))
//output
(k1.0 ,ArrayBuffer(2.0 , 4.0 , 100.0 , 2.0 ))
(k7.0 ,ArrayBuffer(8.0 ))
(k10.0 ,ArrayBuffer(11.0 , 13.0 , 1.0 , 11.0 , 11.0 ))
(k4.0 ,ArrayBuffer(5.0 , 7.0 , 7.0 ))
val