案例:给定一组键值对("spark",2),("hadoop",6),("hadoop",4),("spark",6),键值对的key
表示图书名称,value表示某天图书销量,请计算每个键对应的平均值,也就是计算每种图书的每天平均销量。
spark代码:
import org.apache.spark.{SparkConf, SparkContext} object tushu { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("paixu").setMaster("local") val sc = new SparkContext(conf) val book = Array(("spark",2),("hadoop",6),("hadoop",4),("spark",6)) val rdd = sc.parallelize(book) val result = rdd.mapValues(x=>(x,1)).reduceByKey((x,y)=>(x._1+y._1,x._2+y._2)). mapValues(x=>(x._1/x._2)).collect().toList println(result) } }//解释:
//1.var