6.
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}object _06TestAggregateByKey_exercise {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local[*]")
val sc = new SparkContext(conf)
/**
* 使用aggreateByKey计算每个key出现的次数,与value之和 从而可以计算平均值
*/
val rdd1: RDD[(String,Int)] = sc.makeRDD(List(("a",1), ("a",2),("b",3), ("a",2), ("b",4),("b",5)), 2)/**
* 从需求分析中,可知,返回的数据应该是次数与value和,那么能存这样的数据,元组是比较合适的
*/
val result: RDD[(String, (Int, Int))] = rdd1.aggregateByKey((0, 0))(
(x, y) => (x._1 + 1, x._2 + y),
(x, y) => (x._1 + y._1, x._2 + y._2)
)
//继续求平均值
val result1: RDD[(String, Double)] = result.map(x => {
var t = x._2
var avg = t._2 / t._1.toDouble
(x._1, avg)
})
result1.collect().foreach(println)
// (b,4.0)
//(a,1.6666666666666667)
}
}
7.
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD//作用: 将kv对形式的RDD的v映射成别的类型
object _07MapValueDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().