Java:
package cn.spark.sparktest; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; public class SecondarySort { public static void main(String[] args){ SparkConf conf = new SparkConf().setAppName("SecondarySort").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); final JavaRDD<String> lines = sc.textFile("C://Users//Desktop//sort.txt"); JavaPairRDD<SecondarySortKey,String> pairs = lines.mapToPair( new PairFunction<String, SecondarySortKey, String>() { @Override public Tuple2<SecondarySortKey, String> call(String s) throws Exception { String[] pariSplit = s.split(" "); SecondarySortKey key = new SecondarySortKey(Integer.valueOf(pariSplit[0]), Integer.valueOf(pariSplit[1])); return new Tuple2<SecondarySortKey, String>(key,s); } }); JavaPairRDD<SecondarySortKey, String> sortedPairs = pairs.sortByKey(); JavaRDD<String> sortedLines = sortedPairs.map( new Function<Tuple2<SecondarySortKey,String>, String>() { private static final long serialVersionUID = 1L; @Override public String call(Tuple2<SecondarySortKey, String> v1) throws Exception { return v1._2; } }); sortedLines.foreach(new VoidFunction<String>() { private static final long serialVersionUID = 1L; @Override public void call(String t) throws Exception { System.out.println(t); } }); sc.close(); } }
package cn.spark.sparktest; import java.io.Serializable; import scala.math.Ordered; /** * 自定义的二次排序key * @author Administrator * */ public class SecondarySortKey implements Ordered<SecondarySortKey>, Serializable { private static final long serialVersionUID = -2366006422945129991L; // 首先在自定义key里面,定义需要进行排序的列 private int first; private int second; public SecondarySortKey(int first, int second) { this.first = first; this.second = second; } @Override public boolean $greater(SecondarySortKey other) { if(this.first > other.getFirst()) { return true; } else if(this.first == other.getFirst() && this.second > other.getSecond()) { return true; } return false; } @Override public boolean $greater$eq(SecondarySortKey other) { if(this.$greater(other)) { return true; } else if(this.first == other.getFirst() && this.second == other.getSecond()) { return true; } return false; } @Override public boolean $less(SecondarySortKey other) { if(this.first < other.getFirst()) { return true; } else if(this.first == other.getFirst() && this.second < other.getSecond()) { return true; } return false; } @Override public boolean $less$eq(SecondarySortKey other) { if(this.$less(other)) { return true; } else if(this.first == other.getFirst() && this.second == other.getSecond()) { return true; } return false; } @Override public int compare(SecondarySortKey other) { if(this.first - other.getFirst() != 0) { return this.first - other.getFirst(); } else { return this.second - other.getSecond(); } } @Override public int compareTo(SecondarySortKey other) { if(this.first - other.getFirst() != 0) { return this.first - other.getFirst(); } else { return this.second - other.getSecond(); } } // 为要进行排序的多个列,提供getter和setter方法,以及hashcode和equals方法 public int getFirst() { return first; } public void setFirst(int first) { this.first = first; } public int getSecond() { return second; } public void setSecond(int second) { this.second = second; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + first; result = prime * result + second; return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; SecondarySortKey other = (SecondarySortKey) obj; if (first != other.first) return false; if (second != other.second) return false; return true; } }
测试:
scala:
package cn.spark.study.core class SecondSort(val first : Int, val second : Int) extends Ordered[SecondSort] with Serializable { def compare(that : SecondSort): Int={ if (this.first - that.first !=0){ this.first - that.first }else{ this.second - that.second } } }
package cn.spark.study.core import org.apache.spark.{SparkConf, SparkContext} object SecondSortTest { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setAppName("SecondSort") .setMaster("local") val sc = new SparkContext(conf) val lines = sc.textFile("C://Users//Desktop//sort.txt", 1) val pairs = lines.map { line => ( new SecondSort(line.split(" ")(0).toInt, line.split(" ")(1).toInt), line)} val sortedPairs = pairs.sortByKey() val sortedLines = sortedPairs.map(sortedPair => sortedPair._2) sortedLines.foreach { sortedLine => println(sortedLine) } } }
测试: