踏踏实实积累,不能浮躁!!!
3 5 2 8 2 4 1 7 1 5
需求:先根据上述数据中的第一列进行排序,第一列相等的根据第二列进行排序最后效果如下
3 5
2 8
2 4
1 7
1 5
原理:根据自定义的key来实现二次排序 其实不难 下面是Spark代码的实现逻辑供大家参考
1:自定义key
package cn.spark.java.sparkcore;
import scala.math.Ordered;
import java.io.Serializable;
/**
*
* 为了实现二次排序所用到的key
*/
public class Sort_2_key implements Ordered<Sort_2_key>,Serializable{
private int first;
private int second;
// 为上面字段提供getter setter 和 hashcode() 和 equals 方法
public Sort_2_key(){
}
public int getFirst() {
return first;
}
public int getSecond() {
return second;
}
public void setFirst(int first) {
this.first = first;
}
public void setSecond(int second) {
this.second = second;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Sort_2_key sort_2 = (Sort_2_key) o;
if (first != sort_2.first) return false;
return second == sort_2.second;
}
@Override
public int hashCode() {
int result = first;
result = 31 * result + second;
return result;
}
@Override
public int compare(Sort_2_key that) {
if(this.getFirst() - that.getFirst() != 0){
return this.getFirst() - that.getFirst();
}else{
return this.getSecond() - that.getSecond();
}
}
@Override
public boolean $less(Sort_2_key that) {
if(this.getFirst() < that.getFirst()){
return true;
}else if(this.getFirst() == that.getFirst() && this.getSecond() < that.getSecond()){
return true;
}else{
return false;
}
}
@Override
public boolean $greater(Sort_2_key that) {
if(this.getFirst() > that.getFirst()){
return true;
}else if(this.getFirst() == that.getFirst() && this.getSecond() > that.getSecond()){
return true;
}else{
return false;
}
}
@Override
public boolean $less$eq(Sort_2_key that) {
if(this.$less(that)){
return true;
}else if(this.getFirst() == that.getFirst() && this.getSecond() == that.getSecond()){
return true;
}else{
return false;
}
}
@Override
public boolean $greater$eq(Sort_2_key that) {
if(this.$greater(that) ){
return true;
}else if(this.getFirst() == that.getFirst() && this.getSecond() == that.getSecond()){
return true;
}else{
return false;
}
}
@Override
public int compareTo(Sort_2_key that) {
if(this.getFirst() - that.getFirst() != 0){
return this.getFirst() - that.getFirst();
}else{
return this.getSecond() - that.getSecond();
}
}
}
2:排序的逻辑实现
package cn.spark.java.sparkcore;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.util.Arrays;
/**
* 进行二次排序
*
* 原理是根据自己定义的key 来进行排序 不难
* 3 5
2 8
2 4
1 7
1 5
*/
public class Sort_2class {
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("Sort_2")
.setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD lines = sc.textFile("f://scala/sort.txt");
JavaPairRDD words = lines.mapToPair(new PairFunction<String,Sort_2_key,String>() {
@Override
public Tuple2<Sort_2_key,String> call(String line) throws Exception {
Sort_2_key key = new Sort_2_key();
key.setFirst(Integer.parseInt(line.split(" ")[0]));
key.setSecond(Integer.parseInt(line.split(" ")[1]));
return new Tuple2(key,line);
}
});
JavaPairRDD rel = words.sortByKey(false);
rel.foreach(new VoidFunction<Tuple2>() {
@Override
public void call(Tuple2 tuple2) throws Exception {
System.out.println(tuple2._2);
}
});
sc.close();
}
}
后续会更新上Hadoop的实现逻辑代码