package com.sparktest;
import java.io.Serializable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import scala.math.Ordered;
/**
* 二次排序(按第一个值升序,按第二个值降序)
* 部分原始数据:
* 5 6
4 1
6 7
6 4
7 2
4 1
*/
public class SecondSort
{
public static void main(String[] args)
{
SparkConf conf = new SparkConf()
.setAppName("SecondSort")
.setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> linesRDD = sc.textFile("d://spark_data//secondSort.txt");
/**
* 把每一行转为tuple: (num1,num2)
*/
JavaPairRDD<MySortKey, String> tupleRDD = linesRDD.mapToPair(new PairFunction<String, MySortKey, String>()
{
@Override
public Tuple2<MySortKey, String> call(String line) throws Exception
{
String data[] = line.split(" ");
int num1 = Integer.parseInt(data[0]);
int num2 = Integer.parseInt(data[1]);
return new Tuple2<MySortKey, String>(new MySortKey(num1,num2),line);
}
});
JavaPairRDD<MySortKey, String> sortByKeyRDD = tupleRDD.sortByKey();
JavaRDD<String> sortedRDD = sortByKeyRDD.map(new Function<Tuple2<MySortKey,String>, String>()
{
@Override
public String call(Tuple2<MySortKey, String> tuple) throws Exception
{
return tuple._2;
}
});
sortedRDD.foreach(new VoidFunction<String>()
{
@Override
public void call(String line) throws Exception
{
System.out.println(line);
}
});
sc.close();
}
}
/**
* 自定义排序key,按第一个值升序,按第二个值降序
*/
class MySortKey implements Ordered<MySortKey>,Serializable{
/**
* (域的意义、目的、功能)
*/
private static final long serialVersionUID = -8363444941537305530L;
private int first;
private int second;
public MySortKey(int first,int second)
{
this.first = first;
this.second = second;
}
public int getFirst()
{
return first;
}
public void setFirst(int first)
{
this.first = first;
}
public int getSecond()
{
return second;
}
public void setSecond(int second)
{
this.second = second;
}
@Override
public boolean $greater(MySortKey other)
{
if(this.first > other.getFirst()){
return true;
}else if(this.first == other.getFirst() && this.second < other.getSecond()){
return true;
}
return false;
}
@Override
public boolean $greater$eq(MySortKey other)
{
if(this.$greater(other)){
return true;
}else if(this.first == other.getFirst() && this.second == other.getSecond()){
return true;
}
return false;
}
@Override
public boolean $less(MySortKey other)
{
return !this.$greater$eq(other);
}
@Override
public boolean $less$eq(MySortKey other)
{
return !this.$greater(other);
}
@Override
public int compare(MySortKey other)
{
if(this.first - other.first != 0){
return this.first - other.getFirst();
}else{
return -(this.second - other.getSecond());
}
}
@Override
public int compareTo(MySortKey other)
{
if(this.first - other.first != 0){
return this.first - other.getFirst();
}else{
return -(this.second - other.getSecond());
}
}
}
Spark二次排序(java)
最新推荐文章于 2022-07-22 15:37:35 发布