自定义分区器,按照key将数据保存到指定分区
import org.apache.spark.{Partitioner, SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
object SparkPartitionBy_Opter1 {
def main(args: Array[String]): Unit = {
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("WordCount")
val sc: SparkContext = new SparkContext(config)
val listRDD= sc.makeRDD(List(("a",1),("b",2),("c",3)))
val partitionByRDD: RDD[(String, Int)] = listRDD.partitionBy(new MyPartitionner(5))
partitionByRDD.saveAsTextFile("output")
}
}
//声明分区器
class MyPartitionner(partitions:Int) extends Partitioner {
override def numPartitions: Int = {
partitions
}
//按照key进行分区
override def getPartition(key: Any): Int = key match {
case "a" =>1
case _ =>2
}
}