对文本中内的数字,取最大的前3个。
分别使用JAVA和scala 实现
使用的测试附件。
Top3.java
package com.starmcu.git.spark.java;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.List;
/**
*
* 取最大的前3个数字
*/
public class Top3 {
public static void main(String[] args) {
SparkConf conf =new SparkConf().setAppName("Accumulator").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
JavaRDD lines =sc.textFile("C://3.txt");
JavaPairRDD pairs =lines.mapToPair(new PairFunction() {
@Override
public Tuple2 call(String s) throws Exception {
return new Tuple2(Integer.valueOf(s),s);
}
});
JavaPairRDD sortedPairs =pairs.sortByKey(false);
JavaRDD sortedNumbers =sortedPairs.map(new Function, Integer>() {
@Override
public Integer call(Tuple2 v1) throws Exception {
return v1._1;
}
});
List top3Numbers=sortedNumbers.take(3);
for(Integer i :top3Numbers){
System.out.println(i);
}
}
}
Top3.scala
package com.starmcu.git.spark.scala
import org.apache.spark.{SparkConf, SparkContext}
object Top3 {
def main(args: Array[String]): Unit = {
val conf =new SparkConf().setMaster("local").setAppName("SortWordCount")
val sc =new SparkContext(conf)
val lines = sc.textFile("C://3.txt")
val pairs =lines.map(line => (line.toInt,line))
val sortedPairs =pairs.sortByKey(false)
val sortedNumbers =sortedPairs.map(sortedPair => sortedPair._1)
val top3numbers =sortedNumbers.take(3)
for(num
println(num)
}
}
}