/**
* 自定义分区器
*/
class SubjectPartitoner(subjects: Array[String]) extends Partitioner{
// 用于存储学科信息以及对应的分区号
val subjectAndNum = new mutable.HashMap[String, Int]();
// 计 /数器,用来生成分区号
var i = 0
for (subject <- subjects) {
subjectAndNum += (subject -> i)
i += 1
}
// 获取分区数
override def numPartitions = subjects.length
// 获取分区号
override def getPartition(key: Any) = subjectAndNum.getOrElse(key.toString, 0)
}
调用
val partitioner = new SubjectPartitoner(subjects)
// 开始分区
val partitioned: RDD[(String, (String, Int))] = cached.partitionBy(partitioner)