import org.apache.spark._
import SparkContext._
import org.apache.spark.SparkConf
import java.util.Date
import java.text.SimpleDateFormat
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.spark.Partitioner
object partitioner {
def main(args: Array[String]): Unit = {
val time = new SimpleDateFormat("MMddHHmm").format(new Date());
val sparkConf = new SparkConf().setAppName("wordcount_"+time)
sparkConf.set("mapreduce.framework.name", "yarn");
val sc =new SparkContext(sparkConf)
val textFile = sc.textFile(
"hdfs://namenode:9000/data/mapreduce/chuping/test_in_1/new5", 1).cache()
val result = textFile.flatMap (line => line.split("\t") ).
map (word => (word,1)).reduceByKey(new testPartitioner, _+_)
result.saveAsTextFile("hdfs://namenode:9000/data/zk/test/partitioner"+time)
sc.stop()
}
}
class testPartitioner extends Partitioner{
val numPartitions = 3
def getPartition(key: Any)=1 指定到第几个reduce
}
这里的程序只是一个测试的程序,使用的也是一个count而已,无法体现partitioner的实际作用,但是在实际生产中,partitioner的运用比比皆是