groupBy
groupBy(function)
function返回key,传入的RDD的各个元素根据这个key进行分组
def main(args: Array[String]): Unit = { //默认分区12个 val sc = new SparkContext(new SparkConf().setMaster("local").setAppName("test").set("spark.default.parallelism", "12")) var rdd1 = sc.makeRDD(1 to 10, 2) rdd1.groupBy(x => { if (x % 2 == 0) "even" else "odd" }).collect.foreach(println(_)) }
16/12/20 16:39:07 INFO DAGScheduler: Job 0 finished: collect at ShellTest.scala:25, took 2.2