partitionBy
class MyPartition(number:Int) extends Partitioner{
override def numPartitions: Int = number
override def getPartition(key: Any): Int = {
if(key.isInstanceOf[Int]){
val keyInt: Int = key.asInstanceOf[Int]
if (keyInt % 2==0)
0
else
1
}
else
0
}
}
val rdd: RDD[(Int, String)] = sc.makeRDD(Array((1,"aaa"),(2,"bbb"),(3,"ccc")),3)
val rdd2: RDD[(Int, String)] = rdd.partitionBy(new MyPartition(2))
rdd2.collect().foreach(println)
reduceByKey()按照K聚合V
val rdd = sc.makeRDD(List(("a",1),("b",5),("a",5),("b",2)))
val rdd1: RDD[(String, Int)]