自定义分区器:
- 继承Partitioner
- 重写方法
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("spark")
val sc = new SparkContext(sparkConf)
val rdd: RDD[(String, String)] = sc.makeRDD(List(
("nba", "a"),
("cba", "aa"),
("nba", "aaa"),
("wba", "aaaa")
),2)
val resultRDD: RDD[(String, String)] = rdd.partitionBy(new MyPartitioner)
resultRDD.saveAsTextFile("output")
}
class MyPartitioner extends Partitioner{
override def numPartitions: Int = 3
override def getPartition(key: Any): Int = {
key match {
case "nba" => 0
case "cba" => 1
case _ => 2
}
}