partitionBy //自定义类继承分区父类 class MyPartition(number:Int) extends Partitioner{ override def numPartitions: Int = number override def getPartition(key: Any): Int = { if(key.isInstanceOf[Int]){ val keyInt: Int = key.asInstanceOf[Int] if (keyInt % 2==0) 0 else 1 } else 0 } } //3.1 创建第一个RDD val rdd: RDD[(Int, String)] = sc.makeRDD(Array((1,"aaa"),(2,"bbb"),(3,"ccc")),3) //使用默认分区 // val rdd2: RDD[(Int, (Int, String))] = rdd.partitionBy(new HashPartitioner(2)).mapPartitionsWithIndex((index,items)=>items.map((index,_))) //使用自定义分区 val rdd2: RDD[(Int, String)] = rdd.partitionBy(new MyPartition(2)) rdd2.collect().foreach(println) reduceByKey()按照K聚合V val rdd = sc.makeRDD(List(("a",1),("b",5),("a",5),("b",2))) val rdd1: RDD[(String, Int)] = rdd.reduceByKey