import java.io.File
import org.apache.commons.io.FileUtils
import org.apache.hadoop.fs.FileUtil
import org.apache.spark.rdd.RDD
import org.apache.spark.{HashPartitioner, Partitioner, SparkConf, SparkContext}
object FavPerson3Partitioner {
def main(args: Array[String]): Unit = {
val sc = new SparkContext(new SparkConf().setAppName("FavPerson3Partitioner").setMaster("local"))
val data = Array(("aaa", 2), ("aaa", 3), ("aaa", 1), ("aaa", 0), ("aaa", 4),
("aa", 2), ("aa", 3), ("aa", 1), ("aa", 0), ("aa", 4),
("a", 2), ("a", 3), ("a", 1), ("a", 0), ("a", 4))
val dataRdd: RDD[(String, Int)] = sc.parallelize(data)
val res: RDD[(String, Int)] = dataRdd.partitionBy(new MyPartitioner(3))
val file = new File("C:\\Users\\S\\Desktop\\内民大实训\\log.log")
if(file.exists()) {
FileUtils.deleteDirectory(file)
}
res.saveAsTextFile("C:\\Users\\S\\Desktop\\内民大实训\\log.log")
sc.stop()
}
}
class MyPartitioner(num: Int) extends Partitioner {
override def numPartitions: Int = num
override def getPartition(key: Any): Int = {
key.toString.length % num
}
}
Spark自定义分区(一)
最新推荐文章于 2021-11-24 16:09:03 发布