1、对文本文件中的数字,获取最大的前三个。
代码实例:
package com.netcloud.spark.sparkcore.projectpractice;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.List;
/** java版本
* 对文本文件中的数字,获取最大的前三个
*
* @author yangshaojun
* #date 2019/3/15 16:38
* @version 1.0
*/
public class Demo_002_TopThree {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local").setAppName("Demo_001_SparkSecondarySort");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lineRDD = sc.textFile("data/sparkcore/top3.txt");
//将读取的RDD<String>类型 映射为RDD<Integer,String>
JavaPairRDD<Integer, String> integerRDD = lineRDD.mapToPair(new PairFunction<String, Integer, String>() {
@Override
public Tuple2<Integer, String> call(String s) throws Exception {
return new Tuple2<Integer, String>(Integer.valueOf(s), s);
}
});
//降序排序
JavaPairRDD<Integer, String> sortNumberRDD = integerRDD.sortByKey(false);
//将 RDD<Integer,String> 映射为 RDD<String>类型
JavaRDD<String> retRDD = sortNumberRDD.map(new Function<Tuple2<Integer, String>, String>() {
@Override
public String call(Tuple2<Integer, String> v1) throws Exception {
return v1._2;
}
});
//获取前三条数据
List<String> beforeThree = retRDD.take(3);
//遍历打印输出结果
for (String ret : beforeThree) {
System.out.println(ret);
}
sc.stop();
}
}
package com.netcloud.bigdata.spark_core.basiclearning.projectpractice
import org.apache.spark.{SparkConf, SparkContext}
/** scala版本
* 对文本文件中的数字,获取最大的前三个
*
* @author yangshaojun
* #date 2019/3/15 16:40
* @version 1.0
*/
object Demo_002_TopThree {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("Demo_002_TopThree")
val sc = new SparkContext(conf)
val lineRDD = sc.textFile("data/sparkcore/top3.txt")
val kvRDD = lineRDD.map(num => (num.toInt, num))
val sortRDD = kvRDD.sortByKey(false)
val normalRDD = sortRDD.map(kv => kv._2)
val list = normalRDD.take(3)
for (ret <- list) {
println(ret)
}
}
}
2、对每个班级内的学生成绩,取出前三名。(分组TopN)
代码实例:
package com.netcloud.spark.sparkcore.projectpractice;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
/**
* 对每个班级内的学生成绩,取出前三名。(分组TopN)
*
* @author yangshaojun
* #date 2019/3/15 17:05
* @version 1.0
*/
public class Demo_003_GroupTopThree {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local").setAppName("Demo_003_GroupTopThree");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lineRDD = sc.textFile("data/sparkcore/score.txt");
JavaPairRDD<String, Integer> kvRDD = lineRDD.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s.split(",")[0].toString(), Integer.valueOf(s.split(",")[1]));
}
});
JavaPairRDD<String, Iterable<Integer>> groupPairRDD = kvRDD.groupByKey();
JavaPairRDD<String, Iterable<Integer>> top3RDD = groupPairRDD.mapToPair(new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() {
@Override
public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> t) throws Exception {
String calssName = t._1;
Integer[] top3 = new Integer[3];
Iterator<Integer> scores = t._2.iterator();
while (scores.hasNext()) {
Integer score = scores.next();
for (int i = 0; i < 3; i++) {
if (top3[i] == null) {
top3[i] = score;
break;
} else if (score > top3[i]) {
int tmp = top3[i];
top3[i] = score;
if (i < top3.length - 1) {
top3[i + 1] = tmp;
}
break;
}
}
}
return new Tuple2<String, Iterable<Integer>>(calssName, Arrays.asList(top3));
}
});
top3RDD.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() {
@Override
public void call(Tuple2<String, Iterable<Integer>> t) throws Exception {
String cassName = t._1;
Iterator<Integer> scores = t._2.iterator();
while (scores.hasNext()) {
Integer score = scores.next();
System.out.println(cassName + ":" + score);
}
}
});
}
}