// sortByKey其实就是根据key进行排序,可以手动指定升序,或者降序
// 返回的,还是JavaPairRDD,其中的元素内容,都是和原始的RDD一模一样的
// 但是就是RDD中的元素的顺序,不同了
public static void mySortByKey(){
SparkConf conf = new SparkConf()
.setAppName("mySortByKey")
.setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
List<Tuple2<Integer, String>> scoreList = Arrays.asList(
new Tuple2<Integer, String>(65, "leo"),
new Tuple2<Integer, String>(50, "tom"),
new Tuple2<Integer, String>(100, "marry"),
new Tuple2<Integer, String>(80, "jack"));
JavaPairRDD<Integer, String> scores = sc.parallelizePairs(scoreList);
JavaPairRDD<Integer, String> sortedScores =scores.sortByKey();
sortedScores.foreach(new VoidFunction<Tuple2<Integer,String>>() {
private static final long serialVersionUID = 1L;
@Override
public void call(Tuple2<Integer, String> t) throws Exception {
System.out.println(t._1 + ": " + t._2);
}
});
sc.close();
}
运算结果:
50: tom
65: leo
80: jack
100: marry