1、按照文件中的第一列排序。
2、如果第一列相同,则按照第二列排序
二次排序
、实现自定义的,要实现接口和接口,在中实现自己多个列的排序算法
、将包含文本的,映射成为自定义为,文文本的、使用算子按照自定义的进行排序
、再次映射,提出自定义的,只保留文本行
SecondarySort.java
package com.starmcu.git.spark.java;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
/**
*
* 二次排序
* 1、实现自定义的key,要实现Ordered接口和Serializable接口,在key中实现自己多个列的排序算法
* 2、将包含文本的RDD,映射成key为自定义key为key,value文文本的JavaPairRDD
* 3、使用sortByKey算子按照自定义的key进行排序
* 4、再次映射,提出自定义的key,只保留文本行
*
*/
public class SecondarySort {
public static void main(String[] args) {
SparkConf conf =new SparkConf().setMaster("local").setAppName("SecondarySort");
JavaSparkContext sc =new JavaSparkContext(conf);
JavaRDD lines =sc.textFile("C://2.txt");
JavaPairRDD pairs =lines.mapToPair(new PairFunction() {
@Override
public Tuple2 call(String s) throws Exception {
String[] lineSplited = s.split(" ");
SecondarySortKey key = new SecondarySortKey(Integer.valueOf(lineSplited[0]),Integer.valueOf(lineSplited[1]));
return new Tuple2(key,s);
}
});
JavaPairRDD sortedPairs = pairs.sortByKey(false);
JavaRDD sortedLines =sortedPairs.map(new Function, String>() {
@Override
public String call(Tuple2 v1) throws Exception {
return v1._2;
}
});
sortedLines.foreach(new VoidFunction() {
@Override
public void call(String s) throws Exception {
System.out.println(s);
}
});
}
}
SecondarySort.scala
package com.starmcu.git.spark.scala
import org.apache.spark.{SparkConf, SparkContext}
object SecondarySort {
def main(args: Array[String]): Unit = {
val conf =new SparkConf().setAppName("LocalFile").setMaster("local")
val sc =new SparkContext(conf)
val lines =sc.textFile("c://2.txt")
val pairs = lines.map(line => (new SecondarySortKey(line.split(" ")(0).toInt,line.split(" ")(1).toInt),line))
val sortedPairs =pairs.sortByKey(false)
val sortLines =sortedPairs.map(sortPair => sortPair._2)
sortLines.foreach(sortLine => println(sortLine))
}
}
使用到的自定义排序的类:
SecondarySortKey.java
package com.starmcu.git.spark.java;
import java.io.Serializable;
import scala.math.Ordered;
public class SecondarySortKey implements Ordered,Serializable {
//实现在自定义key里面,定义需要排序的列
private int first;
private int second;
public SecondarySortKey(Integer integer, Integer integer1) {
this.first=integer;
this.second=integer1;
}
//为要进行排序的多个列,提供getter和setter方法,以及hashcode和equals方法
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SecondarySortKey that = (SecondarySortKey) o;
if (first != that.first) return false;
return second == that.second;
}
@Override
public int hashCode() {
int result = first;
result = 31 * result + second;
return result;
}
public int getFirst() {
return first;
}
public void setFirst(int first) {
this.first = first;
}
public int getSecond() {
return second;
}
public void setSecond(int second) {
this.second = second;
}
public static void main(String[] args) {
}
@Override
public int compare(SecondarySortKey that) {
if(this.first -that.getFirst() !=0){
return this.first-that.getFirst();
}else{
return this.second -that.getSecond();
}
}
@Override
public boolean $less(SecondarySortKey that) {
if(this.first
return true;
}else if(this.first ==that.getFirst() && this.second
return true;
}
return false;
}
@Override
public boolean $greater(SecondarySortKey that) {
if(this.first> that.getFirst()){
return true ;
}else if(this.first == that.getFirst() && this.second>that.getSecond()){
return true;
}
return false;
}
@Override
public boolean $less$eq(SecondarySortKey that) {
if(this.first
return true;
}else if(this.first ==that.getFirst() && this.second<=that.getSecond()){
return true ;
}
return false;
}
@Override
public boolean $greater$eq(SecondarySortKey that) {
if(this.$greater(that)){
return true;
} else if(this.first ==that.getFirst() &&
this.second >= that.getSecond()){
return true;
}
return false;
}
@Override
public int compareTo(SecondarySortKey that) {
if(this.first -that.getFirst() !=0){
return this.first-that.getFirst();
}else{
return this.second -that.getSecond();
}
}
}
SecondarySortKey.scala
package com.starmcu.git.spark.scala
class SecondarySortKey(val first:Int,val second:Int) extends Ordered[SecondarySortKey] with Serializable {
override def compare(that: SecondarySortKey) :Int ={
if (this.first - that.first != 0){
this.first - that.first
} else {
this.second -that.second
}
}
}
排序的附件: